In [1]:
import numpy as np

In [18]:
class DecisionStump:
    def __init__(self,polarity=None,feature=None,threshold=None,alpha=None):
        self.polarity = polarity
        self.feature = feature
        self.threshold = threshold 
        self.alpha = alpha
    
    def predict(self, X):
        n_samples,n_features = X.shape
        preds = np.ones(n_samples)
        
        X_c = X[:,self.feature]
        
        if self.polarity == 1:
            preds[X_c < self.threshold] = -1
        else:
            preds[X_c > self.threshold] = -1
            
        return preds
    

In [32]:
class Adaboost:
    def __init__(self,n_classifiers):
        self.n_classifiers = n_classifiers
        
    def fit(self,X,y):
        n_samples,n_features = X.shape
        
        self.classifiers = []
        
        w = np.full(n_samples,1/n_samples)
        for _ in range(self.n_classifiers):
            clf = DecisionStump()
            
            min_error = 1000000
            
            for f_idx in range(n_features):
                X_c = X[:,f_idx]
                
                thresholds = np.unique(X_c)
                
                for thr in thresholds:
                    polarity = 1
                    
                    preds = np.ones(n_samples)
                    
                    preds[X_c < thr] = -1
                    
                    misclassified = w[preds != y]
                    
                    error = sum(misclassified)
                    
                    if error > 0.5:
                        error = 1 - error
                        polarity = -1
                    
                    if min_error > error:
                        min_error = error
                        clf.polarity = polarity
                        clf.feature = f_idx
                        clf.threshold = thr
                    
            EPS = 1e-10
            clf.alpha = 0.5 * np.log((1 - min_error + EPS) / (min_error+EPS))
            
            preds = clf.predict(X)
            
            w *= np.exp(-clf.alpha * preds * y)
            w /= np.sum(w)
            self.classifiers.append(clf)
            
     
    def predict(self,X):
        preds = [clf.alpha * clf.predict(X) for clf in self.classifiers]
        
        preds = np.sum(preds,axis=0)
        preds = np.sign(preds)
        
        return preds
                    
                    
                    
        

In [7]:
import sklearn
from sklearn import datasets

bc = datasets.load_breast_cancer()

X,y = bc.data,bc.target

y[y==0] = -1


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [33]:
clf = Adaboost(10)
clf.fit(X_train,y_train)
preds = clf.predict(X_test)

In [34]:
acc = np.sum(preds == y_test) /len(y_test) 
acc

0.951048951048951

In [30]:

for i in range(0,30,5):
    clf = Adaboost(i)
    clf.fit(X_train,y_train)
    preds = clf.predict(X_test)
    acc = np.sum(preds == y_test) /len(y_test) 
    print(i,acc)

0 0.0
5 0.9230769230769231
10 0.951048951048951
15 0.951048951048951
20 0.9440559440559441
25 0.9370629370629371
