In [1]:
import numpy as np
from sklearn.datasets import load_iris
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

In [2]:
# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Adaboost Classifier
clf = AdaBoostClassifier(n_estimators=100, random_state=42)

# Fit the classifier to the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



## AdaBoost From Scratch

In [3]:
class AdaBoost:
    def __init__(self, n_estimators=100):
        self.trees = []
        self.alphas = []
        self.n_estimators = n_estimators
    
    def fit(self, X, y):
        n_classes = len(np.unique(y))
        w = np.full(X.shape[0], 1/X.shape[0])

        for m in range(self.n_estimators):
            # Fit weak classifier
            DTC = DecisionTreeClassifier(max_depth=1)
            DTC.fit(X, y, sample_weight=w)
            y_pred = DTC.predict(X)

            #Compute weighted error
            err = np.sum(w * (y != y_pred).astype(int)) / np.sum(w) 

            #Compute alpha
            alpha = np.log((1-err)/(err)) + np.log(n_classes-1)

            #Store the classifier and alpha
            self.trees.append(DTC)
            self.alphas.append(alpha)

            #Update data weights
            w = w*np.exp(alpha * (y != y_pred).astype(int))
            w /= np.sum(w)
    
    def predict(self, X):
        class_votes = np.zeros((X.shape[0], len(self.trees[0].classes_)))
        for m in range(self.n_estimators):
            prediction = self.trees[m].predict(X)
            for i, pred in enumerate(prediction):
                class_votes[i,pred] +=self.alphas[m]
        
        return np.argmax(class_votes, axis=1)

In [4]:
model = AdaBoost()
model.fit(X_train,y_train)
pred = model.predict(X_test)

In [5]:
# Evaluate the classifier
accuracy = accuracy_score(y_test, pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

print("Classification Report:")
print(classification_report(y_test, pred))

Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

