In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from pathlib import Path

In [7]:
def adaboost(X, y, num_iter, max_depth=1):
    """Given an numpy matrix X, a array y and num_iter return trees and weights 
   
    Input: X, y, num_iter
    Outputs: array of trees from DecisionTreeClassifier
             trees_weights array of floats
    Assumes y is {-1, 1}
    """
    trees = []
    trees_weights = [] 
    N, _ = X.shape
    d = np.ones(N) / N

    for m in range(num_iter):
        # Train decision stump
        weak_classifier = DecisionTreeClassifier(max_depth=1, random_state=0)
        weak_classifier.fit(X, y, sample_weight=d)
        y_pred = weak_classifier.predict(X)
        err = np.sum(d * (y != y_pred))
        alpha = 0.5*np.log((1 - err) / err)
        
        for i in range(len(d)):
            if y[i] == y_pred[i]: 
                continue
            else:
                d[i] *= alpha

        d /= np.sum(d)
        trees_weights.append(alpha)
        trees.append(weak_classifier)

    return trees, trees_weights


def adaboost_predict(X, trees, trees_weights):
    """Given X, trees and weights predict Y
    """
    N, _ =  X.shape
    y = np.zeros(N)
    for alpha,tree in zip(trees_weights,trees):
        y=y+alpha*tree.predict(X)
    
    return np.sign(y)


---
## Test on a sample data


In [8]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# Load breast cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target
# Convert targets to {-1, 1}
y[y == 0] = -1

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Applying Adaboost
num_iter = 10
trees, trees_weights = adaboost(X_train, y_train, num_iter)

# Predicting
y_pred = adaboost_predict(X_test, trees, trees_weights)

# Accuracy
accuracy = np.sum(y_test == y_pred) / float(len(y_test))
accuracy

0.9385964912280702