### Adaboost Classifier

#### Steps:
 - Load Dataset
 - Assign Initial Weights
 - for loop:
     - Fit the Model on data plus weights
     - Predict on X
     - Find the misclassifications
     - Compute Total Error (TE) - (Sum of weights of samples that have been misclassified)
     - Compute Performance of Stump - (1/2 * log(1-TE/TE))
     - Update Weights
         - MissClassified Samples = Weight * exp(Performance of Stump)
         - Correctly Classified Samples = Weight * exp(-Performance of Stump)
     - Normalize the Weights
     - Re-Sample the Dataset based on the weights

In [1]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import random
import numpy as np

In [2]:
def load_dataset():
    X,y = datasets.load_iris(return_X_y = True, as_frame = True)
    X['target'] = list(y)
    X = X[X["target"].isin([0,1])]
    X = X.drop(['petal length (cm)', 'petal width (cm)'], axis = 1)
    return X

In [3]:
class AdaBoostCustom:
    
    def __init__(self, X):
        self.X = X.copy()
        self.clf_stumps = []
        self.stump_performances = []
        self.total_errors = []
        random.seed(42)
        pass
    
    def initialize_weights(self):
        self.X['weights'] = 1/self.X.shape[0]
    
    def find_misclassifications(self, y_pred, y):
        missclassifications = [True if y_pred[i] != y.iloc[i] else False for i in range(0, len(y_pred))]
        return missclassifications
    
    def calculate_total_error(self, missclassifications, weights):
        weighted_errors = [missclassifications[i] * weights[i] for i in range(0, len(missclassifications))]
        return sum(weighted_errors)
    
    def calculate_stump_performance(self, total_error):
        stump_performance = 0.5 * np.log(((1 - total_error) / total_error))
        return stump_performance
        
    def update_weights(self, missclassifications, weights, stump_performance):
        new_weights = [(weights[i] * np.exp(stump_performance)) if missclassifications[i] == True else (weights[i] * 1/np.exp(stump_performance)) for i in range(0, len(missclassifications))]
        return new_weights
    
    def fit(self, iterations, features = (0,2), target = 'target'):
        
        y = self.X[target] * 2 - 1
        self.initialize_weights()
        
        for i in range(0, iterations):
            clf_stump = DecisionTreeClassifier(criterion = 'gini', random_state = 100, max_depth = 1).fit(self.X.iloc[:,features[0]:features[1]], y)
            y_pred = clf_stump.predict(self.X.iloc[:,features[0]:features[1]])
            
            missclassifications = self.find_misclassifications(y_pred, y)
            total_error = self.calculate_total_error(missclassifications, self.X['weights'].tolist())
            stump_performance = self.calculate_stump_performance(total_error)
            
            self.clf_stumps.append(clf_stump)
            self.total_errors.append(total_error)
            self.stump_performances.append(stump_performance)
            
            self.X['weights'] = self.update_weights(missclassifications, self.X['weights'].tolist(), stump_performance)
            sum_of_weights = sum(self.X['weights'])
            self.X['weights'] = self.X['weights'] / sum_of_weights
            self.X = self.X.sample(self.X.shape[0], replace = True, weights = self.X['weights'])
            
            
    def predict(self, X_test, noofcols):
        X_test = np.reshape(X_test.tolist(),(1, noofcols))
        stump_preds = np.array([clf_stump.predict(X_test) for clf_stump in self.clf_stumps])
        return np.sign(np.dot(self.stump_performances, stump_preds))

In [9]:
X_new = load_dataset()
adaboost_clf = AdaBoostCustom(X_new)
adaboost_clf.fit(10)

In [17]:
predict_index = 90
print(f"Predicted Value : {adaboost_clf.predict(X_new.iloc[predict_index, 0:2], 2)}")
print(f"Original Value : {X_new.iloc[predict_index, 2] * 2 -1}")

Predicted Value : [1.]
Original Value : 1
