In [6]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import random

In [7]:
"""
    Multi-Class Perceptron using One-vs-All (OvA) strategy.
"""
class MultiClassPerceptron:

    def __init__(self, alpha=0.01):
        """
        Initialize the multi-class perceptron model.
        :param alpha: Learning rate (positive float).
        """
        if alpha <= 0:
            raise ValueError("Learning rate should be positive.")
        
        self.alpha = alpha  # Learning rate
        self.num_classes = None  # Number of classes (to be initialized during training)
        self.weights = None  # Weights for each class
        self.biases = None  # Biases for each class

    def train(self, X, y, epochs=10):
        """
        Train the multi-class perceptron using the one-vs-rest strategy.
        :param X: NxD numpy array, input features (N samples, D features).
        :param y: Nx1 numpy array, class labels (integer values from 0 to C-1).
        :param epochs: Number of iterations over the training data.
        """
        # Validate inputs
        if X.size == 0 or y.size == 0:
            raise ValueError("Input data X and y cannot be empty.")
        if X.shape[0] != y.shape[0]:
            raise ValueError("Number of samples in X and y must match.")
        if epochs <= 0:
            raise ValueError("Number of epochs should be positive.")
        
        # Determine the number of classes
        self.num_classes = len(np.unique(y))
        n_samples, n_features = X.shape

        # Initialize weights and biases for each class
        self.weights = np.zeros((self.num_classes, n_features))  # Shape: (C, D)
        self.biases = np.zeros(self.num_classes)  # Shape: (C,)

        # Training loop
        for epoch in range(epochs):
            for i in range(n_samples):
                X_i = X[i]
                y_i = y[i]

                # Compute the scores for all classes
                scores = np.dot(self.weights, X_i) + self.biases

                # Predicted class (highest score)
                y_pred = np.argmax(scores)

                # Update weights and biases if misclassified
                if y_pred != y_i:
                    # Penalize the wrong class
                    self.weights[y_pred] -= self.alpha * X_i
                    self.biases[y_pred] -= self.alpha

                    # Reward the correct class
                    self.weights[y_i] += self.alpha * X_i
                    self.biases[y_i] += self.alpha

    def predict(self, X_new):
        """
        Predict the class labels for new samples.
        :param X_new: MxD numpy array, input features for new samples (M samples, D features).
        :return: Mx1 numpy array, predicted class labels.
        """
        if self.weights is None or self.biases is None:
            raise ValueError("The perceptron is not trained yet. Train it before predicting.")

        # Compute scores for all classes
        scores = np.dot(X_new, self.weights.T) + self.biases
        # Predict the class with the highest score
        return np.argmax(scores, axis=1)



In [87]:
"""
    SAMME - Multi-class AdaBoost algorithm
"""
class SAMME:

    def __init__(self, num_learner: int, num_cats: int):
        """
        Initialize the SAMME model.
        :param num_learners: number of weak learners.
        :param num_cats: number of class.
        """
        if num_cats < 2:
            raise ValueError(f"num_cats should be at least 2, but got {num_cats}")
        self.num_learner = num_learner
        self.num_cats = num_cats
        self.entry_weights = None
        self.learner_weights = None
        self.sorted_learners = None

    def get_num_learner(self):
        return self.num_learner

    def train(self, train_data: list, learners: list):
        """
        Train the AdaBoost model.
        :param train_data: List of (features, label) tuples.
        :param learners: List of weak learner objects with a `predict(X)` method.
        """
        print("Starting boosting...")
        n = len(train_data)
        m = len(learners)

        self.entry_weights = np.full(n, 1 / n, dtype=np.float32)
        self.learner_weights = np.zeros(m, dtype=np.float32)
        self.performance_metrics = []  # Store error rates for visualization

        errors = []
        for learner_idx, learner in enumerate(learners):
            error = 0
            for X, label in train_data:
                if learner.predict(X.reshape(1, -1)) != label:
                    error += 1
            errors.append((learner, error))

        # Sort learners by error
        self.sorted_learners = [learner for learner, _ in sorted(errors, key=lambda x: x[1])]


        # Boost each learner
        for idx, learner in enumerate(self.sorted_learners):
            # Compute weighted error
            is_wrong = np.zeros((n,))
            for entry_idx, entry in enumerate(train_data):
                X, label = entry[0], int(entry[1])
                predicted_cat = learner.predict(X.reshape(1, -1))
                if predicted_cat != label:
                    is_wrong[entry_idx] = 1
            
            # Clamp weighted_error to avoid invalid values
            weighted_error = np.sum(is_wrong * self.entry_weights) / self.entry_weights.sum()
            weighted_error = max(1e-6, min(1 - 1e-6, weighted_error))

            self.performance_metrics.append(weighted_error)
                        
            # Compute alpha (learner weight)
            self.learner_weights[idx] = max(0, np.log((1 - weighted_error) / weighted_error) + np.log(self.num_cats - 1))
            
            # Update entry weights
            is_wrong = is_wrong.flatten()
            self.entry_weights *= np.exp(self.learner_weights[idx] * is_wrong)
            self.entry_weights /= self.entry_weights.sum()  # Normalize

        self.learner_weights /= np.sum(self.learner_weights)
        print("Boosting completed.")

    def predict(self, data):
        """
        Predict the label for each sample in data.
        :param data: List or array of features.
        :return: Predicted class labels.
        """
        pooled_predictions = np.zeros((len(data), self.num_cats), dtype=np.float32)
        for idx, learner in enumerate(self.sorted_learners):
            predictions = np.array([learner.predict(X.reshape(1, -1)) for X in data])
            for i, pred in enumerate(predictions):
                pooled_predictions[i, pred] += self.learner_weights[idx]
        return np.argmax(pooled_predictions, axis=1)


    def visualize_performance(self):
        """
        Visualize the performance of each weak learner during training.
        """
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(self.performance_metrics) + 1), self.performance_metrics, marker='o', label="Error Rate")
        plt.title("Performance of Weak Learners During Training")
        plt.xlabel("Iteration")
        plt.ylabel("Error Rate")
        plt.legend()
        plt.grid(True)
        plt.show()

In [57]:
"""
Train weak learners separately.
"""
def train_weak_learners(num_weak_learners, alpha, epochs, X_train, y_train):
    """
    Train weak learners (perceptrons) with different alphas and return them.
    :param num_weak_learners: Number of weak learners.
    :param alpha: Learning rate for the perceptrons.
    :param epochs: Number of epochs for training weak learners.
    :param X_train: Training data features.
    :param y_train: Training data labels.
    :return: List of trained weak learners.
    """
    print("Training weak learners...")
    weak_learners = []
    alpha_min = 0.1 * alpha
    alpha_max = 10 * alpha
    total_accuracy = 0

    for i in range(num_weak_learners):
        learner = MultiClassPerceptron(alpha=random.uniform(alpha_min, alpha_max))
        weak_learners.append(learner)

        # Shuffle training data for each learner to ensure different weight updates
        shuffled_indices = np.random.permutation(len(X_train))
        X_train_shuffled = X_train[shuffled_indices]
        y_train_shuffled = y_train[shuffled_indices]

        # Train each weak learner
        learner.train(X_train_shuffled, y_train_shuffled, epochs=epochs)

        # Compute training accuracy for the current learner
        predictions = learner.predict(X_train_shuffled)
        accuracy = np.mean(predictions == y_train_shuffled)
        total_accuracy += accuracy

    # Compute and print the average training accuracy
    average_accuracy = total_accuracy / num_weak_learners
    print(f"Average Training Accuracy of Weak Learners = {average_accuracy:.2f}")

    return weak_learners

In [94]:
def main():
    # Load dataset
    X, y = load_digits(return_X_y=True)
    X = X / 16.0  # Normalize features

    # Split data into training, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Further split the training set for weak learners and AdaBoost
    X_weak_train, X_boost_train, y_weak_train, y_boost_train = train_test_split(X_train, y_train, test_size=0.93, random_state=42)

    # Define weak learner configuration
    weak_learner_config = {
        "alpha": 0.05,
        "epochs": 1
    }

    # Define the list of number of weak learners to try
    num_weak_learner_list = [20, 50, 100, 500, 1000]

    best_accuracy = 0
    best_model = None

    # Iterate over the number of weak learners
    for num_weak_learners in num_weak_learner_list:
        print(f"\nTraining AdaBoost model with {num_weak_learners} weak learners...")

        # Train the weak learners for the current configuration
        weak_learners = train_weak_learners(
            num_weak_learners,
            weak_learner_config["alpha"],
            weak_learner_config["epochs"],
            X_weak_train,
            y_weak_train
        )

        # Initialize the AdaBoost model with the trained weak learners
        num_classes = len(np.unique(y_boost_train))
        model = SAMME(len(weak_learners), num_classes)

        # Train the AdaBoost model
        train_data = [(X_boost_train[i], y_boost_train[i]) for i in range(len(y_boost_train))]
        model.train(train_data, weak_learners)

        # Training error
        y_train_pred = model.predict(X_boost_train)
        accuracy = accuracy_score(y_boost_train, y_train_pred)
        print(f"Training accuracy: {accuracy * 100:.2f}%")

        # Validate the model
        y_val_pred = model.predict(X_val)
        accuracy = accuracy_score(y_val, y_val_pred)
        print(f"Validation accuracy: {accuracy * 100:.2f}%")

        # Keep track of the best model
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

    # Test the best model
    if best_model:
        y_test_pred = best_model.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        print(f"\nBest model is the one with {best_model.get_num_learner()} learners:")
        print(f"Test accuracy: {test_accuracy * 100:.2f}%")
    else:
        print("No model was selected.")

if __name__ == "__main__":
    main()



Training AdaBoost model with 20 weak learners...
Training weak learners...
Average Training Accuracy of Weak Learners = 0.69
Starting boosting...
Boosting completed.
Training accuracy: 83.69%
Validation accuracy: 79.44%

Training AdaBoost model with 50 weak learners...
Training weak learners...
Average Training Accuracy of Weak Learners = 0.64
Starting boosting...
Boosting completed.
Training accuracy: 85.94%
Validation accuracy: 82.22%

Training AdaBoost model with 100 weak learners...
Training weak learners...
Average Training Accuracy of Weak Learners = 0.64
Starting boosting...
Boosting completed.
Training accuracy: 87.51%
Validation accuracy: 83.89%

Training AdaBoost model with 500 weak learners...
Training weak learners...
Average Training Accuracy of Weak Learners = 0.63
Starting boosting...
Boosting completed.
Training accuracy: 88.93%
Validation accuracy: 86.11%

Training AdaBoost model with 1000 weak learners...
Training weak learners...
Average Training Accuracy of Weak Le