## **Model**

In [46]:
# HW_adaboost.py

import numpy as np
from sklearn.tree import DecisionTreeClassifier

    
class AdaBoostClassifier:
    """
    AdaBoost (Adaptive Boosting) Classifier
    An ensemble learning algorithm that combines multiple weak classifiers to build a strong classifier.
    """

    def __init__(self, n_estimators=50):
        """
        Initialize the AdaBoost classifier.

        Parameters:
        - n_estimators: Number of weak classifiers to use.
        """
        self.n_estimators = n_estimators
        self.w = []  # Store the weights of the classifiers
        self.models = []  # Store the weak classifiers

    def train(self, X, y):
        """
        Fit the AdaBoost model to the training data.

        Parameters:
        - X: Training data, shape (n_samples, n_features)
        - y: Target labels, shape (n_samples,)
        """
        n_samples, n_features = X.shape
        # Initialize weights uniformly
        D = np.ones(n_samples) / n_samples  

        for t in range(self.n_estimators):
            # Create a weak classifier (decision stump)
            model = DecisionTreeClassifier(max_depth=2)  
            model.fit(X, y, sample_weight=D)  # Fit model with sample weights
            y_pred = model.predict(X)  # Predictions from the model

            # Calculate the weighted error
            error = np.sum(D * (y_pred != y))  # Weighted error

            # Error cannot be exactly 0.5 because it represents the weighted sum of misclassifications.
            # If error is 0.5, it means the model is performing no better than random guessing,
            # This means the model is not contributing to the ensemble learning process,
            # and the weights D will not be updated, leading to no improvement in the model.
            if error == 0.5:
                print("Warning: Error is 0.5, stopping training.")
                break

            # Calculate the weight for the weak classifier
            w_t = 0.5 * np.log((1.0 - error) / (error + 1e-10))  # Avoid division by zero

            # Update weights for the next iteration
            D *= np.exp(-w_t * y * y_pred)  # Update weights based on prediction
            D /= np.sum(D * np.exp(-w_t * y * y_pred))  # Normalize weights


            self.models.append(model)  # Store the model
            self.w.append(w_t)  # Store the w_t

    def predict(self, X):
        """
        Predict the class labels for the input data.

        Parameters:
        - X: Input data, shape (n_samples, n_features)

        Returns:
        - Predicted class labels, shape (n_samples,)
        """
        pred = np.zeros(X.shape[0])  # Initialize predictions
        for w_i, model in zip(self.w, self.models):
            pred += w_i * model.predict(X)  # Weighted sum of predictions
        return np.sign(pred)  # Return the sign of the predictions
    
    def accuracy(self, X, y):
        """
        Calculate the accuracy of the model.

        Parameters:
        - X: Input data, shape (n_samples, n_features)
        - y: True labels, shape (n_samples,)

        Returns:
        - Accuracy as a float.
        """
        predictions = self.predict(X)  # Get predictions
        accuracy = np.mean(predictions == y)  # Calculate accuracy
        return accuracy


## **Check Model**

In [48]:
import pytest
import numpy as np

# Sets random seed for testing purposes
np.random.seed(0)

# Creates Test Models
test_model1 = AdaBoostClassifier(n_estimators=10)
test_model2 = AdaBoostClassifier(n_estimators=50)
test_model3 = AdaBoostClassifier(n_estimators=20)

# Creates Test Data
x1 = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
y1 = np.array([-1, -1, 1, 1])  # Labels should be -1 and 1 for AdaBoost

x2 = np.array([[0, 0], [1, 1], [1, 0], [0, 1], [0, 2], [1, 2]])
y2 = np.array([-1, -1, 1, 1, -1, 1])  # More complex dataset

x3 = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
y3 = np.array([-1, -1, 1, 1, 1, 1])  # Another dataset

# Test Model Train
def check_train_dtype(model, X, y):
    assert isinstance(model.models, list)
    assert len(model.models) > 0, "Model should have trained at least one weak learner."
    assert len(model.w) == len(model.models), "Weights should match the number of models."

# Train the models
test_model1.train(x1, y1)
check_train_dtype(test_model1, x1, y1)

test_model2.train(x2, y2)
check_train_dtype(test_model2, x2, y2)

test_model3.train(x3, y3)
check_train_dtype(test_model3, x3, y3)

# Test Model Predictions
def check_test_dtype(pred, X_test):
    assert isinstance(pred, np.ndarray)
    assert pred.ndim == 1 and pred.shape == (X_test.shape[0],)

# Make predictions
pred1 = test_model1.predict(x1)
check_test_dtype(pred1, x1)
assert (pred1 == y1).all(), "Predictions should match the expected labels for model 1."

pred2 = test_model2.predict(x2)
check_test_dtype(pred2, x2)
assert (pred2 == y2).all(), "Predictions should match the expected labels for model 2."

pred3 = test_model3.predict(x3)
check_test_dtype(pred3, x3)
assert (pred3 == y3).all(), "Predictions should match the expected labels for model 3."

# Test Model Accuracy
def check_accuracy(model, X, y, expected_accuracy):
    accuracy = model.accuracy(X, y)
    assert accuracy == expected_accuracy, f"Expected accuracy: {expected_accuracy}, but got: {accuracy}"

# Check accuracy
check_accuracy(test_model1, x1, y1, 1.0)  # Expecting 100% accuracy for this simple case
check_accuracy(test_model2, x2, y2, 1.0)  # Expecting 100% accuracy for this dataset
check_accuracy(test_model3, x3, y3, 1.0)  # Expecting 100% accuracy for this dataset

# Additional Tests for Edge Cases
def test_empty_train():
    with pytest.raises(ValueError):
        test_model1.train(np.array([]), np.array([]))

def test_empty_predict():
    with pytest.raises(ValueError):
        test_model1.predict(np.array([]))

def test_accuracy_empty():
    with pytest.raises(ValueError):
        test_model1.accuracy(np.array([]), np.array([]))

# Run additional edge case tests
test_empty_train()
test_empty_predict()
test_accuracy_empty()

# Print a message indicating the tests have completed
print("All tests completed successfully.")

All tests completed successfully.
