In [1]:
import numpy as np
import pandas as pd

In [2]:
class DecisionStump:
    def __init__(self):
        self.polarity = 1
        self.feature_idx = None
        self.threshold = None
        self.alpha = None
        
    def predict(self,X):
        n_samples = X.shape[0]
        X_c = X[:,self.feature_idx]
        preds = np.ones(n_samples)
        if self.polarity == 1:
            preds[X_c < self.threshold] = -1
        else:
            preds[X_c > self.threshold] = -1
            
        return preds

In [3]:
class Adaboost:
    def __init__(self,n_clf):
        self.n_clf = n_clf 
        
    def fit(self,X,y):
        n_samples,n_features = X.shape
        
        #initialize weights for the dataset
        w = np.full(n_samples,1/n_samples)
        
        
        
        self.clfs = []
        
        for _ in range(self.n_clf):
            
            clf = DecisionStump()
            
            min_error = float('inf')
        
            #find the best feature and threshold
            for feature_idx in range(n_features):
                X_c = X[:,feature_idx]
                thresholds = np.unique(X_c)
            
                #looping all possible thresholds
                for thr in thresholds:
                    polarity = 1
                    preds = np.ones(n_samples)
                    preds[X_c < thr] = -1
                
                    #error
                    misclassification = w[preds != y]
                    error = sum(misclassification)
                
                    if error > 0.5:
                        error = 1 - error
                        polarity = -1
                
                    if error < min_error:
                        min_error = error
                        clf.polarity = polarity
                        clf.feature_idx = feature_idx
                        clf.threshold = thr
                
                
                EPS = 1e-10
                clf.alpha = 0.5 * np.log((1-min_error+EPS)/(min_error + EPS))
            
                preds = clf.predict(X)
            
                #updating weights of data points
                w *= np.exp(-clf.alpha * y * preds)
                w /= np.sum(w)
            
                self.clfs.append(clf)
        
    def predict(self,X):
        preds = [clf.alpha * clf.predict(X) for clf in self.clfs]
        y_preds = np.sum(preds,axis=0)
        y_preds = np.sign(y_preds)
        return y_preds

In [4]:
import sklearn
from sklearn import datasets

bc = datasets.load_breast_cancer()

X,y = bc.data,bc.target

y[y==0] = -1


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [17]:
clf = Adaboost(5)
clf.fit(X_train,y_train)
preds = clf.predict(X_test)

In [18]:
acc = np.sum(preds == y_test)/ len(preds)
acc

0.9300699300699301

In [5]:
X = np.array([2,2])
X.shape[0]

2

In [6]:
np.ones(3)

array([1., 1., 1.])