In [None]:
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

import pandas as pd

In [2]:
x, y = make_classification(n_samples=1000, n_classes=2, weights=[0.7, 0.3], random_state=42)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

**AdaBoost**

**Weight Calculation:**
$$\alpha_t = \frac{1}{2} \ln\left(\frac{1 - \varepsilon_t}{\varepsilon_t}\right)$$

Where:
- $\alpha_t$: Weight of $t$-th weak learner
- $\varepsilon_t$: Error rate of $t$-th weak learner

**Instance Weight Update:**
$$w_i^{(t+1)} = \frac{w_i^{(t)} \exp(-\alpha_t \cdot y_i \cdot h_t(x_i))}{Z_t}$$

Where:
- $w_i$: Weight of instance $i$
- $y_i$: True label
- $h_t(x_i)$: Prediction of $t$-th learner
- $Z_t$: Normalization factor

**Final Prediction:**
$$H(x) = \text{sign}\left(\sum_t \alpha_t \cdot h_t(x)\right)$$

Where:
- $H(x)$: Final prediction
- $h_t(x)$: Prediction of $t$-th learner
- $\alpha_t$: Weight of $t$-th learner

In [3]:
adaboost = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50,
    learning_rate=1.0,
    algorithm='SAMME',
    random_state=42
)
adaboost.fit(x_train, y_train)
y_pred = adaboost.predict(x_test)
print(f"AdaBoost Accuracy: {accuracy_score(y_test, y_pred):.3f}")

AdaBoost Accuracy: 0.870




In [4]:
importance = pd.DataFrame({
    'feature': [f"feature_{i}" for i in range(x_train.shape[1])],
    'importance': adaboost.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance:")
print(importance)


Feature Importance:
       feature  importance
5    feature_5    0.308520
14  feature_14    0.173387
1    feature_1    0.106111
10  feature_10    0.080602
6    feature_6    0.054288
0    feature_0    0.039846
17  feature_17    0.038460
11  feature_11    0.035936
2    feature_2    0.031066
13  feature_13    0.024249
8    feature_8    0.023117
18  feature_18    0.023043
12  feature_12    0.021761
15  feature_15    0.021258
7    feature_7    0.018356
3    feature_3    0.000000
9    feature_9    0.000000
4    feature_4    0.000000
16  feature_16    0.000000
19  feature_19    0.000000


In [5]:
# Estimator weights (how much each model contributes)
print(f"\nEstimator Weights (first 5): {adaboost.estimator_weights_[:5]}")


Estimator Weights (first 5): [2.19722458 0.51972446 0.60663303 0.66779734 0.68589935]


In [8]:
# Error rate at each stage
print("\nError Rate by Stage:")
for i, estimator in enumerate(adaboost.estimators_[:10]):
    stage_pred = estimator.predict(x_test)
    error = 1 - accuracy_score(y_test, stage_pred)
    weight = adaboost.estimator_weights_[i]
    print(f"Stage {i+1}: Error={error:.3f}, Weight={weight:.3f}")


Error Rate by Stage:
Stage 1: Error=0.140, Weight=2.197
Stage 2: Error=0.300, Weight=0.520
Stage 3: Error=0.260, Weight=0.607
Stage 4: Error=0.460, Weight=0.668
Stage 5: Error=0.595, Weight=0.686
Stage 6: Error=0.185, Weight=0.609
Stage 7: Error=0.385, Weight=0.494
Stage 8: Error=0.415, Weight=0.388
Stage 9: Error=0.460, Weight=0.448
Stage 10: Error=0.640, Weight=0.542


**AdaBoost Components in Detail**
1. Weak Learners

In [None]:
# Decision stumps (depth 1)
stump = DecisionTreeClassifier(max_depth=1)

# Or use other weak learners
adaboost_nb = AdaBoostClassifier(
    estimator=GaussianNB(),
    n_estimators=10,
    random_state=42
)

2. Learning Rate

    ```
    Lower learning rate = more conservative updates
    ```

In [None]:
learning_rate = [0.5, 1.0, 1.5]
for lr in learning_rate:
    model = AdaBoostClassifier(
        n_estimators=50,
        learning_rate=lr,
        random_state=42
    )
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    print(f"Learning Rate {lr}: {score:.3f}")

Learning Rate 0.5: 0.875
Learning Rate 1.0: 0.870
Learning Rate 1.5: 0.840


3. Number of Estimators
    
    ```
    More estimators = better but slower
    ```

In [11]:
n_estimator_list = [10, 25, 50, 100, 200]
for n in n_estimator_list:
    model = AdaBoostClassifier(n_estimators=n, random_state=42)
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    print(f"n_estimators {n}: {score:.3f}")

n_estimators 10: 0.870
n_estimators 25: 0.870
n_estimators 50: 0.870
n_estimators 100: 0.875
n_estimators 200: 0.875


**AdaBoost for Regression**

In [None]:
adaboost_reg = AdaBoostRegressor(
    estimator=None,
    n_estimators=50,
    learning_rate=0.1,
    loss='linear',  # 'linear', 'square', 'exponential'
    random_state=42
)
adaboost_reg.fit(x_train, y_train)
y_pred = adaboost_reg.predict(x_test)