# ML from scratch : AdaBoost, a step by step code with python

By Sabrine Bendimerad

[Lien de l'article (FR)](https://medium.com/@sabrine.bendimerad1/adaboost-d%C3%A9cryptage-%C3%A9tape-par-%C3%A9tape-d53878335cdf)

[Article link (EN)](https://)

## Step 1 : AdaBoost implementation

In [None]:
import numpy as np

def decision_stump(X, y, weights):
    """
    Finds the best decision stump using a single feature.
    Parameters:
    - X: numpy array of features
    - y: numpy array of labels
    - weights: numpy array of instance weights
    Returns:
    - best_stump: dictionary containing the best stump parameters
    """
    num_features = X.shape[1]
    best_stump = {'feature': None, 'threshold': None, 'polarity': None, 'error': float('inf'), 'predictions': None}

    for feature in range(num_features):
        feature_values = X[:, feature]
        thresholds = np.unique(feature_values)

        for threshold in thresholds:
            for polarity in [1, -1]:
                predictions = np.ones(len(y))  # Initialize all predictions to 1.
                # Apply threshold: if polarity is 1, predict -1 if feature value is below threshold.
                if polarity == 1:
                    predictions[X[:, feature] < threshold] = -1
                else:  # If polarity is -1, predict 1 if feature value is above threshold.
                    predictions[X[:, feature] > threshold] = 1

                error = sum(weights[y != predictions])  # Calculate weighted error.

                if error < best_stump['error']:  # Update best stump if current one has lower error.
                    best_stump['error'] = error
                    best_stump['feature'] = feature
                    best_stump['threshold'] = threshold
                    best_stump['polarity'] = polarity
                    best_stump['predictions'] = predictions  # Store predictions in the stump.


    return best_stump

In [2]:
class AdaBoost:
    """
    AdaBoost algorithm implementation.
    """
    def __init__(self, n_learners=5):
        self.n_learners = n_learners  # Number of weak learners to use.
        self.learners = []  # List of weak learners.
        self.alphas = []  # List of learner weights.

    def fit(self, X, y):
        """
        Trains the AdaBoost model.
        Parameters:
        - X: numpy array of features
        - y: numpy array of labels
        """
        n_samples, n_features = X.shape
        weights = np.full(n_samples, (1 / n_samples))  # Initialize weights equally.

        for _ in range(self.n_learners):
            stump = decision_stump(X, y, weights)
            predictions = stump['predictions']  # Use predictions from the decision stump.


            error = sum(weights[y != predictions])
            alpha = 0.5 * np.log((1.0 - error) / (error + 1e-10))  # Calculate alpha.

            weights *= np.exp(-alpha * y * predictions)  # Update weights.
            weights /= np.sum(weights)  # Normalize weights.

            self.learners.append(stump)  # Save the current stump.
            self.alphas.append(alpha)  # Save the current alpha.

    def predict(self, X):
        """
        Makes predictions using the trained AdaBoost model.
        Parameters:
        - X: numpy array of features to predict
        Returns:
        - final_predictions: numpy array of predicted labels
        """
        final_predictions = np.zeros(X.shape[0])
        for alpha, learner in zip(self.alphas, self.learners):
            predictions = np.ones(X.shape[0])
            if learner['polarity'] == 1:
                predictions[X[:, learner['feature']] < learner['threshold']] = -1
            else:
                predictions[X[:, learner['feature']] > learner['threshold']] = 1
            final_predictions += alpha * predictions

        return np.sign(final_predictions)  # Return sign of predictions for classification.

## Step 2 : AdaBoost test

In [8]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np

# Assuming AdaBoost class is defined as previously discussed

# Load the breast cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert labels to -1 and 1
y[y == 0] = -1
y[y == 1] = 1

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the AdaBoost model
model = AdaBoost(n_learners=10)
model.fit(X_train, y_train)

# Predict on the test set
predictions = model.predict(X_test)

# Calculate and print the accuracy on the test set
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy on the test set: {accuracy:.2f}")


Accuracy on the test set: 0.61
