# AdaBoost

In [124]:
import numpy as np
from matplotlib import pyplot as plt

In [125]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
from sklearn.metrics import accuracy_score

In [126]:
# Generate a binary classification dataset
X, y = make_classification(n_samples=1000,   # Number of samples
                           n_features=20,    # Number of features
                           n_informative=15, # Number of informative features
                           n_redundant=5,    # Number of redundant features
                           n_classes=2,      # Binary classification
                           random_state=42)  # Seed for reproducibility

# Convert labels from {0, 1} to {-1, 1}
y = np.where(y == 0, -1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [127]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (800, 20)
y_train shape: (800,)
X_test shape: (200, 20)
y_test shape: (200,)


In [128]:
class AdaBoostClassifier:
    def __init__(self,
                      n_trees: int=100) -> None:
        self.n_trees = n_trees
        self.weights = [] 
        self.alphas = [] 
        self.trees = []

    def fit(self,
            X: np.ndarray,
            y: np.ndarray) -> None:

        self.weights = np.array([1. / X.shape[0] for _ in range(X.shape[0])])
        self.alphas = [] 
        self.trees = []
        
        for _ in range(self.n_trees):
            clf = DecisionTreeClassifier()
            clf.fit(X, y, sample_weight=self.weights)
            y_pred = clf.predict(X)

            indicator = np.where(y != y_pred, 1, 0) 
            eps = np.sum(self.weights * indicator) / np.sum(self.weights)
            eps = max(eps, 1e-8) 
            alpha = np.log((1 - eps) / eps)
            self.weights = self.weights * np.exp(alpha * indicator)

            self.alphas.append(alpha)
            self.trees.append(clf)


    def predict(self, x: np.ndarray) -> np.ndarray:
        out = np.array([alpha * clf.predict(x) for alpha, clf in zip(self.alphas, self.trees)])
        out = np.sum(out, axis=0) 
        return np.sign(out)

In [129]:
abc = AdaBoostClassifier(n_trees=100)
abc.fit(X_train, y_train)
y_pred = abc.predict(X_test)
r2 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {r2}')

Accuracy: 0.81
