In [5]:
import numpy as np

from sklearn.tree import DecisionTreeClassifier

#from sklearn.ensemble import AdaBoostClassifier

from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix as CM


def I(flag):
    return 1 if flag else 0

def sign(x):
    return abs(x)/x if x!=0 else 1       

class AdaBoost:
    
    def __init__(self,n_estimators=50):
        self.n_estimators = n_estimators
        self.models = [None]*n_estimators
        
    def fit(self,X,y):
        
        X = np.float64(X)
        N = len(y)
        print("Number of Training Rows: ", N)
        w = np.array([1/N for i in range(N)])
        print('Initial weights: ', w)
        print()
        
        for m in range(self.n_estimators):
      
            # This line builds the stump to be tested
            # The DecisionTreeClassifier library is doing the heavy lifting to work out which feature to split on and how to split that feature.
            Gm = DecisionTreeClassifier(max_depth=1).fit(X,y,sample_weight=w).predict
                        
            errM = sum([w[i]*I(y[i]!=Gm(X[i].reshape(1,-1))) for i in range(N)])/sum(w) # error for this model/ iteration
 
            AlphaM = np.log((1-errM)/errM) # alpha for this model/ iteration

            w = [w[i]*np.exp(AlphaM*I(y[i]!=Gm(X[i].reshape(1,-1)))) for i in range(N)] # update weights
            
            #print("M = ", m, "of ", self.n_estimators)
            print("M = ", m, "of ", self.n_estimators, '. Alpha for this m:', AlphaM)
            print(w, ' w')
            print(Gm, ' Gm')
            print(errM, ' errM')
            
            self.models[m] = (AlphaM,Gm)

    def predict(self,X):
        
        y = 0
        for m in range(self.n_estimators):
            AlphaM,Gm = self.models[m]

            y += AlphaM*Gm(X)
            #print(y, m, ' y m ')
        print(y, ' y *')
        signA = np.vectorize(sign)
        y = np.where(signA(y)==-1,-1,1)
        return y


# Main

In [6]:
x,y = make_classification(n_samples=25)

#x = x[:,0:5] # reducing the feature space 

#print(x,y, ' data')

'''
As for our implementaion of AdaBoost 
y needs to be in {-1,1}
'''
y = np.where(y==0,-1,1)

clf = AdaBoost(n_estimators=5) # try 5 10 50 and press Run over and over again
clf.fit(x,y)
print(' predict ...')

# Run the prediction function defined above
y_pred = clf.predict(x)


Number of Training Rows:  25
Initial weights:  [0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04]

M =  0 of  5 . Alpha for this m: 1.9924301646902065
[0.29333333333333345, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.29333333333333345, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.29333333333333345]  w
<bound method BaseDecisionTree.predict of DecisionTreeClassifier(max_depth=1)>  Gm
0.11999999999999997  errM
M =  1 of  5 . Alpha for this m: 2.0541237336955462
[0.29333333333333345, 0.04, 0.04, 0.04, 0.04, 0.31200000000000006, 0.04, 0.04, 0.04, 0.04, 0.31200000000000006, 0.04, 0.04, 0.04, 0.04, 0.31200000000000006, 0.29333333333333345, 0.04, 0.04, 0.04, 0.04, 0.04, 0.31200000000000006, 0.31200000000000006, 0.29333333333333345]  w
<bound method BaseDecisionTree.predict of DecisionTreeClassifier(max_depth=1)>  Gm
0.1136363636363636  errM
M =  2 of  5 . Alpha for this 

In [7]:
print(y_pred, ' predicted')

[-1  1  1 -1 -1  1  1 -1 -1 -1  1 -1 -1 -1 -1  1 -1  1  1  1  1 -1  1  1
 -1]  predicted


In [8]:
print("Performance:",100*sum(y_pred==y)/len(y))
print("Confusion Matrix:",CM(y,y_pred))

Performance: 100.0
Confusion Matrix: [[13  0]
 [ 0 12]]
