In [28]:
import numpy as np
import pandas as pd

In [29]:
class DecisionStump:
    def __init__(self):
        self.polarity = 1 #determine the driection of the deicion(left or right)
        self.feature_idx = None #ndex of the feature that the decision stump considers for splitting
        self.alpha = None #weight of the decision stump
        self.threshold = None #initial threshold value to make spliting decisions
        
    def predict(self, X):
        n_samples, n_features = X.shape
        X_column = X[:, self.feature_idx]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1
        return predictions

In [38]:
class Adaboost:
    def __init__(self, n_clf = 5):
        self.n_clf = n_clf #no. of decision stumps or weak learners
        self.clfs = [] #empty list to store the decision stumps
    
    #Training the Adaboost model
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        #create a array of n_samples length and calculate the weight
        w = np.full(n_samples, (1/n_samples)) 
        
        self.clfs = []
        
        #iterate n_clf times over the specified value to find the best decision stump
        for _ in range(self.n_clf):
            clf = DecisionStump()
            min_error = float("inf")
            
            for feature_i in range(n_features):
                X_column = X[:, feature_i] #Extracts the values of the feature "feature_i" for all samples
                thresholds = np.unique(X_column) #find unique threshold values for splitting the data
                
                for threshold in thresholds:
                    #determine the direction f the decision (polarity)
                    p = 1
                    
                    predictions = np.ones(n_samples) #default prediction is an array of ones
                    
                    #if the current value < threshold then set the prediction to -1
                    predictions[X_column < threshold] = -1
                    
                    #Calculate error
                    #assign the weight of the misclassified samples and add a bollean mask and calculate the total error
                    error = np.sum(w[y != predictions])

                    
                    #change the polarity to -1 if the error is greater than 0.5 better for error
                    if error > 0.5:
                        error = 1 - error
                        p = -1
                        
                    if error < min_error:
                        clf.polarity = p
                        clf.threshold = threshold
                        clf.feature_idx = feature_i
                        min_error = error

                        
            clf.alpha = 0.5 * np.log((1.0 - min_error + 1e-10) / (min_error + 1e-10))
            predictions = clf.predict(X)
          
            #update the weights. Misclassified ssamples recieve higher weights.            
            w = (w * np.exp(-clf.alpha * y * clf.predict(X))) / np.sum(w) #Normalizing the updated weights by dividing

            self.clfs.append(clf)
                                    
                
    def predict(self, X):
        clf_preds = [clf.alpha * clf.predict(X) for clf in self.clfs]
        y_pred = np.sum(clf_preds, axis=0)
        y_pred = np.sign(y_pred) #if the output is positive the prediction is +1 else -1
        
        return y_pred


In [39]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

data = datasets.load_breast_cancer()
X, y = data.data, data.target

y[y==0] = -1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

clf = Adaboost(n_clf=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy(y_test, y_pred)
print("Accuracy:", acc)

Accuracy: 0.9385964912280702
