# ✅ Step-by-Step AdaBoost in Python

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Create a Simple Dataset

In [2]:
# Binary classification dataset
X, y = make_classification(n_samples=100, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=42)

# Convert labels to -1 and +1
y = np.where(y == 0, -1, 1)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Step 2: Initialize Parameters

In [3]:
# Number of weak learners
M = 10

# Initialize weights equally
N = len(X_train)
w = np.full(N, 1 / N)

# Store weak learners and their weights
learners = []
alphas = []


# Step 3: AdaBoost Iterations

In [4]:
for m in range(M):
    # Train a weak learner with current weights
    stump = DecisionTreeClassifier(max_depth=1, random_state=42)
    stump.fit(X_train, y_train, sample_weight=w)
    y_pred = stump.predict(X_train)

    # Calculate weighted error
    err = np.sum(w * (y_pred != y_train)) / np.sum(w)

    # Compute alpha (learner weight)
    alpha = 0.5 * np.log((1 - err) / (err + 1e-10))  # add small term to avoid division by 0

    # Update sample weights
    w *= np.exp(-alpha * y_train * y_pred)
    w /= np.sum(w)  # normalize

    # Save learner and alpha
    learners.append(stump)
    alphas.append(alpha)

    print(f"Iteration {m+1}: Error={err:.4f}, Alpha={alpha:.4f}")


Iteration 1: Error=0.0125, Alpha=2.1847
Iteration 2: Error=0.0506, Alpha=1.4656
Iteration 3: Error=0.1133, Alpha=1.0286
Iteration 4: Error=0.1485, Alpha=0.8732
Iteration 5: Error=0.2126, Alpha=0.6546
Iteration 6: Error=0.1825, Alpha=0.7498
Iteration 7: Error=0.1942, Alpha=0.7115
Iteration 8: Error=0.1898, Alpha=0.7258
Iteration 9: Error=0.1915, Alpha=0.7203
Iteration 10: Error=0.1908, Alpha=0.7224


# Step 4: Make Final Predictions

In [5]:
def adaboost_predict(X):
    final_pred = np.zeros(X.shape[0])
    for alpha, learner in zip(alphas, learners):
        final_pred += alpha * learner.predict(X)
    return np.sign(final_pred)


# Step 5: Evaluate

In [7]:
y_pred_train = adaboost_predict(X_train)
y_pred_test = adaboost_predict(X_test)

print("Train Accuracy:", accuracy_score(y_train, y_pred_train))
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))


Train Accuracy: 1.0
Test Accuracy: 1.0


# 🧠 Key Concepts Covered:
1. Sample weighting
2. Error and alpha calculation
3. Updating weights for misclassified points
4. Final weighted majority vote