### Exercise 2

Implement an `AdaBoost` class with a `fit` and `predict` method for binary classification problems. The `AdaBoost` class should accept any classifier from sklearn as a base learner that accepts `sample_weights` as an argument in its `fit` method.

Test your implementation against sklearn's AdaBoostClassifier using 'SAMME' as boosting algorithm on a dataset of your choice. 

---
### Imports

In [1]:
import numpy as np
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

---
### AdaBoost Class

In [2]:
class AdaBoost:

    def __init__(self, base_learner, n_estimators=10):
        self.T = n_estimators
        self.base_clf = base_learner
        self.estimators = None
    
    def fit(self, X, y):
        n_samples = X.shape[0]
        w = np.ones(n_samples)
        self.estimators = []

        for _ in range(self.T):
            w = w / np.sum(w)                   

            h = clone(self.base_clf)  
            h.fit(X, y, sample_weight=w)        

            y_hat = h.predict(X)   
            e = 1 - accuracy_score(y, y_hat, sample_weight=w)  
            a = 0.5 * np.log((1 - e) / e)               

            m = 1*(y == y_hat) - 1*(y != y_hat)  
            w *= np.exp(-a * m)
                    
            self.estimators.append((a, h))  

        return self.estimators
    
    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for a, h in self.estimators:
            pred += a * h.predict(X)
        return np.sign(pred)
    
    def score(self, X, y):
        y_hat = self.predict(X)  
        return accuracy_score(y, y_hat)

---
### Half-Moons Dataset

In [6]:
X, y = make_moons(n_samples=500, noise=0.25, random_state=10)
y[y==0] = - 1 

---
### Test

In [7]:
def test(clf, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, 
                                                        random_state=10)
    clf.fit(X_train, y_train)

    train_error = 1. - clf.score(X_train, y_train)
    test_error = 1. - clf.score(X_test, y_test)

    print(f"train error = {100*train_error:.1f}%")
    print(f"test error  = {100*test_error:.1f}%")

In [8]:
n_estimators = 20

print("My AdaBoost")
clf1 = AdaBoost(DecisionTreeClassifier(max_depth=1), n_estimators=n_estimators)
test(clf1, X, y)

print()
print("Sklearn's AdaBoost")
clf2 = AdaBoostClassifier(n_estimators=n_estimators, algorithm='SAMME')
test(clf2, X, y)

My AdaBoost
train error = 7.2%
test error  = 12.0%

Sklearn's AdaBoost
train error = 7.2%
test error  = 12.0%
