# AdaBoost Classifier

In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [2]:
X,y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [3]:
X

array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
         0.74236675,  0.23154789],
       [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
        -0.21892143, -0.37608578],
       [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
         0.79903499, -0.17121177],
       ...,
       [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
         0.90239871, -0.69230951],
       [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
         1.5711921 ,  1.14877729],
       [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
         0.34329241,  1.07350284]])

In [4]:
y

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [6]:
model = AdaBoostClassifier()
model

In [7]:
model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)
y_pred

array([0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [9]:
print('Current model performance')
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

Current model performance
Accuracy Score: 0.8533333333333334
Confusion Matrix:
[[119  20]
 [ 24 137]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       139
           1       0.87      0.85      0.86       161

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



In [10]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 1.5, 2.0],
    'algorithm': ['SAMME']
}

In [11]:
ada_clf = AdaBoostClassifier()
grid_search = GridSearchCV(estimator=ada_clf, param_grid=param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=50;, score=0.857 total time=   0.5s
[CV 2/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=50;, score=0.857 total time=   0.5s
[CV 3/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=50;, score=0.857 total time=   0.5s
[CV 4/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=50;, score=0.807 total time=   0.5s
[CV 5/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=50;, score=0.821 total time=   0.6s
[CV 1/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=100;, score=0.857 total time=   1.0s
[CV 2/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=100;, score=0.857 total time=   0.9s
[CV 3/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=100;, score=0.857 total time=   1.0s
[CV 4/5] END algorithm=SAMME, learning_rate=0.01, n_estimators=100;, score=0.807 total time=   1.0s
[CV 5/5] END algorithm=SAMME, learning_rate=

In [12]:
grid_search.best_estimator_

In [13]:
grid_search.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 200}

In [14]:
grid_search.best_score_

np.float64(0.8585714285714285)

In [15]:
print('Current model performance')
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

Current model performance
Accuracy Score: 0.8533333333333334
Confusion Matrix:
[[119  20]
 [ 24 137]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       139
           1       0.87      0.85      0.86       161

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



# AdaBoost Regressor

In [16]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [17]:
X,y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [19]:
model = AdaBoostRegressor()
model

In [20]:
model.fit(X_train, y_train)

In [21]:
y_pred = model.predict(X_test)
y_pred

array([ -50.98615286,  -99.9180911 ,  160.41380621,    2.05335839,
         67.04731625,   19.8945943 ,  -19.2989683 ,  133.84580878,
       -164.26151144,  120.97802592,   59.40607794,   29.51319919,
        -96.1242144 ,   92.78762284,   31.59920085, -170.97465092,
        149.09709716,   37.74628902,  -23.30518176,   -9.63736439,
       -170.97465092,   26.09440813,   29.51319919,   48.55576418,
          5.47609097,   37.33326853,  -59.91393244,  -96.1242144 ,
        145.11590736,  200.50940256,  196.60554613,   39.25961235,
         96.34091491,   34.40620941,   61.2679308 ,   34.20851076,
        -74.83959747,   95.8884113 , -171.3581158 ,  -22.24960399,
        149.09709716,  133.84580878,  -46.98840356, -176.40123074,
        -22.24960399,   19.8945943 ,  -22.24960399,   67.48011818,
        -92.53627033,  -46.98840356,   37.74628902,   19.8945943 ,
         11.15860964,  -45.60084156,   19.8945943 ,  -36.97545028,
        143.473669  ,   59.40607794, -108.20645003,  142.56522

In [22]:
print('Current model performance')
print('R2 Score:', r2_score(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))

Current model performance
R2 Score: 0.9504882533536726
Mean Absolute Error: 15.141453523544433
Mean Squared Error: 409.2381751631791


In [23]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5, 1.0, 1.5],
    'loss': ['linear', 'square', 'exponential']
}
grid_reg = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, verbose=3)
grid_reg.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.937 total time=   0.1s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.935 total time=   0.1s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.952 total time=   0.1s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.930 total time=   0.1s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.934 total time=   0.1s
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.936 total time=   0.2s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.935 total time=   0.2s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.955 total time=   0.2s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.932 total time=   0.2s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.938 to

In [24]:
grid_reg.best_estimator_

In [25]:
grid_reg.best_params_

{'learning_rate': 1.5, 'loss': 'square', 'n_estimators': 100}

In [26]:
y_pred = grid_reg.predict(X_test)
y_pred

array([ -51.19591459,  -98.05115539,  167.6644754 ,    7.49430891,
         74.82445136,   10.4460737 ,  -18.50655059,  131.62878799,
       -159.29741696,  123.6652676 ,   59.32107825,   21.38288707,
        -97.79202393,   93.10995944,   23.35080127, -173.5139742 ,
        157.1945845 ,   37.01525804,  -25.20639194,   -5.6650059 ,
       -173.5139742 ,   19.46695448,   24.42825857,   47.80673249,
          9.49711057,   24.66282817,  -81.44280443,  -90.36000871,
        155.17080442,  207.77462424,  207.77462424,   34.791582  ,
         96.91579168,   32.61060986,   72.80545963,   26.74409895,
        -83.22519559,   94.61276075, -174.66146698,  -20.44055075,
        159.97401567,  131.62878799,  -47.95475592, -181.96783618,
        -22.43212537,    7.82462924,  -22.43212537,   74.89536675,
        -87.93656643,  -47.95475592,   36.17979428,   28.94170252,
          4.84481504,  -43.56546434,    9.49711057,  -33.91745943,
        150.53809811,   60.46315514, -104.27258611,  145.87792

In [27]:
print('Current model performance')
print('R2 Score:', r2_score(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))

Current model performance
R2 Score: 0.9593150836550478
Mean Absolute Error: 13.858356249994213
Mean Squared Error: 336.2802173110139
