In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier # Often used as the base estimator
from sklearn.datasets import make_classification, make_moons # For generating sample data
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
print("--- AdaBoostClassifier Example ---")

--- AdaBoostClassifier Example ---


# 1. Generate Sample Data (Binary Classification)
# Using make_classification for a slightly more complex scenario

In [6]:
X, y = make_classification(n_samples=1000,
                           n_features=20,
                           n_informative=10, # Number of features that actually matter
                           n_redundant=5,    # Number of features derived from informative ones
                           n_repeated=0,     # Number of duplicated features
                           n_classes=2,
                           n_clusters_per_class=2,
                           weights=[0.5, 0.5], # Balanced classes
                           flip_y=0.05,      # Add some noise (percentage of labels flipped)
                           random_state=42)   # For reproducibility

# 2. Split Data into Training and Testing sets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 3. Initialize the AdaBoost Classifier
# Base Estimator: The weak learner model.
# - If None, it defaults to DecisionTreeClassifier(max_depth=1) - a decision stump.

# - You can specify other estimators, e.g., a slightly deeper tree.

In [14]:
base_estimator = DecisionTreeClassifier(max_depth=1) # Classic AdaBoost uses stumps

# n_estimators: The maximum number of weak learners to train.
# learning_rate: Shrinks the contribution of each classifier. There's a trade-off
#                between learning_rate and n_estimators. Lower learning_rate
#                often requires more n_estimators for similar performance but
#                can improve generalization.
# algorithm: 'SAMME.R' (default) generally converges faster than 'SAMME' and
#            achieves better results if the base estimator supports probability prediction.
#            'SAMME' uses discrete boosting.

In [22]:
ada_clf = AdaBoostClassifier(
    base_estimator, # Optional: explicitly state the weak learner
    n_estimators=50,            # Number of boosting stages (weak learners)
    learning_rate=1.0,          # Controls the contribution of weak learners
    random_state=42             # For reproducibility
)

# --- Alternative Initialization (using default stump) ---
# ada_clf = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=42)
# ---

# 4. Train the AdaBoost Model

In [24]:
print("\nTraining AdaBoost model...")
ada_clf.fit(X_train, y_train)
print("Training complete.")


Training AdaBoost model...




Training complete.


# 5. Make Predictions on the Test Set

In [27]:
y_pred = ada_clf.predict(X_test)

In [29]:
# 6. Evaluate the Model

In [31]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.4f}")


Model Accuracy: 0.8267


In [33]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.83      0.83       148
           1       0.83      0.82      0.83       152

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



In [35]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[123  25]
 [ 27 125]]


In [37]:
# --- Optional: Explore Model Properties ---
print(f"\nNumber of estimators used: {len(ada_clf.estimators_)}")


Number of estimators used: 50


# print("\nEstimator weights (importance):", ada_clf.estimator_weights_) # Weights (alpha) assigned to each learner

# print("\nEstimator errors:", ada_clf.estimator_errors_) # Error rate of each learner during training

# --- Example with a different base estimator (slightly deeper tree) ---

In [41]:
print("\n--- Trying AdaBoost with a deeper base tree (max_depth=3) ---")


--- Trying AdaBoost with a deeper base tree (max_depth=3) ---


In [45]:
ada_deep_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=3, random_state=42),
    n_estimators=50,
    learning_rate=0.5, # Often good to reduce learning rate with more complex base models
    random_state=42
)

In [47]:
ada_deep_clf.fit(X_train, y_train)



In [49]:
y_pred_deep = ada_deep_clf.predict(X_test)

In [51]:
accuracy_deep = accuracy_score(y_test, y_pred_deep)

In [53]:
print(f"Model Accuracy (Deeper Base Tree): {accuracy_deep:.4f}")
print("(Note: Performance might increase or decrease depending on the data and parameters)")

Model Accuracy (Deeper Base Tree): 0.8533
(Note: Performance might increase or decrease depending on the data and parameters)


# --- AdaBoostRegressor Example (Brief) ---
# For regression problems, use AdaBoostRegressor

In [56]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

In [58]:
print("\n--- AdaBoostRegressor Example ---")


--- AdaBoostRegressor Example ---


In [60]:
X_reg, y_reg = make_regression(n_samples=500, n_features=5, noise=10, random_state=42)

In [62]:
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# Default base estimator for Regressor is DecisionTreeRegressor(max_depth=3)

In [65]:
ada_reg = AdaBoostRegressor(n_estimators=50, learning_rate=1.0, loss='linear', random_state=42)

In [67]:
ada_reg.fit(X_reg_train, y_reg_train)

In [69]:
y_reg_pred = ada_reg.predict(X_reg_test)

In [71]:
mse = mean_squared_error(y_reg_test, y_reg_pred)

In [73]:
print(f"AdaBoost Regressor Mean Squared Error: {mse:.4f}")

AdaBoost Regressor Mean Squared Error: 1726.7472
