In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier

In [27]:
class Stump:
    
    def __init__(self):
        self.polarity = 1
        self.feature_idx = None
        self.threshold = None
        self.alpha = None
        
    def predict(self, X):
        n_samples = X.shape[0]
        X_column = X[:, self.feature_idx]
        
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1
        
        return predictions

class AdaBoost:

    def __init__(self, n_clf = 5):
        self.n_clf = n_clf
        self.y = None
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        weights = np.full(n_samples, (1/n_samples))
        self.y = np.where(y == 0, -1, y)
        self.classifiers = []
        for _ in range(0, self.n_clf):
            clf = Stump()
            min_error = float('inf')
            for feature_idx in range(0, n_features):
                X_column = X[:, feature_idx]
                thresholds = np.unique(X_column)
                for threshold in thresholds:
                    p = 1
                    predictions = np.ones(n_samples)
                    predictions[X_column < threshold] = -1
                    misclassified = weights[self.y != predictions]
                    error = sum(misclassified)
                    
                    if error > 0.5:
                        error = 1 - error
                        p = -1
                    
                    if error < min_error:
                        min_error = error
                        clf.polarity = p
                        clf.threshold = threshold
                        clf.feature_idx = feature_idx
            eps = 1e-10
            clf.alpha = (1/2)*np.log((1 - error)/(error + eps))
            
            predictions = clf.predict(X)
            weights *= np.exp(-clf.alpha * y * predictions)
            weights /= np.sum(weights)
            
            self.classifiers.append(clf)

    def predict(self, X):
        clf_pred = [clf.predict(X) * clf.alpha for clf in self.classifiers]
        y_pred = np.sum(clf_pred, axis = 0)
        y_pred = np.sign(y_pred)
        y_pred = np.where(y_pred == -1, 0, y_pred)
        
        return y_pred

def fcnAccuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [30]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=3)

In [31]:
dt = AdaBoost()
dt.fit(X_train, y_train)
    
y_pred = dt.predict(X_test)
accuracy = fcnAccuracy(y_test, y_pred)

print ("Accuracy:", accuracy)

Accuracy: 0.9300699300699301


In [32]:
dt2 = AdaBoostClassifier(n_estimators=5)
dt2.fit(X_train, y_train)
    
y_pred2 = dt.predict(X_test)
accuracy2 = fcnAccuracy(y_test, y_pred)

print ("Accuracy:", accuracy2)

Accuracy: 0.9300699300699301
