In [59]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [125]:
class AdaBoost:
    def __init__(
        self, B, 
        params = {
            "max_depth": 4,
        }
    ):
        self.B = B
        self.params = params

        self.ensemble = []

    def fit(self, X, y):
        weights = np.full(X.shape[0], 1 / X.shape[0])

        for _ in range(self.B):
            dtc = DecisionTreeClassifier(
                max_depth=self.params["max_depth"]
            )
            dtc.fit(X, y, sample_weight=weights)
            y_pred = dtc.predict(X)

            eps = np.sum(np.where(y_pred != y, 1, 0) * weights)
            beta = eps / (1 - eps)

            self.ensemble.append([dtc, beta])
            weights[y_pred == y] = weights[y_pred == y] * beta
            
            if (weights_sum := np.sum(weights)) != 0:
                weights /= weights_sum      
            else:
                break

    def predict(self, X):
        for tree in self.ensemble:
            y_pred = tree[0].predict(X)


# Test
n_train = 2000
n_test = 10000

X_train = np.random.normal(0, 1, (n_train, 10))
chi_train = np.random.chisquare(10, n_train)
X_train_power = np.sum(X_train ** 2, axis=1)
y_train = np.where(X_train_power > chi_train, 1, -1)

X_test = np.random.normal(0, 1, (n_test, 10))
chi_test = np.random.chisquare(10, n_test)
X_test_power = np.sum(X_test ** 2, axis=1)
y_test = np.where(X_test_power > chi_test, 1, -1)

ada = AdaBoost(B=10)
ada.fit(X_train, y_train)
ada.ensemble

[[DecisionTreeClassifier(max_depth=4), 0.6233766233766234],
 [DecisionTreeClassifier(max_depth=4), 0.6485510781539663],
 [DecisionTreeClassifier(max_depth=4), 0.6470709334379222],
 [DecisionTreeClassifier(max_depth=4), 0.613922459739718],
 [DecisionTreeClassifier(max_depth=4), 0.6113204016556619],
 [DecisionTreeClassifier(max_depth=4), 0.7221995684948358],
 [DecisionTreeClassifier(max_depth=4), 0.7051759083723045],
 [DecisionTreeClassifier(max_depth=4), 0.7397272584639196],
 [DecisionTreeClassifier(max_depth=4), 0.6524450736494002],
 [DecisionTreeClassifier(max_depth=4), 0.707511329951186]]