# Adaboost Classifier

In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [2]:
X, y = make_classification(n_samples = 1000, n_features = 20, n_classes = 2, random_state = 1)

In [3]:
#Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

In [4]:
classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train)

In [5]:
#Make predictions
y_pred = classifier.predict(X_test)

In [6]:
y_pred

array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,

In [7]:
#Evaluate Model
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.806060606060606
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.80      0.82       176
           1       0.78      0.81      0.80       154

    accuracy                           0.81       330
   macro avg       0.81      0.81      0.81       330
weighted avg       0.81      0.81      0.81       330

Confusion Matrix:
[[141  35]
 [ 29 125]]


In [8]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators' : [50, 100, 200],
    'learning_rate' : [0.01, 0.1, 1.0, 1.5, 2.0],
    'algorithm' : ['SAMME', 'SAMME.R']
}
ada = AdaBoostClassifier()
clf = GridSearchCV(estimator = ada, param_grid = param_grid, cv = 5, verbose = 3, n_jobs = -1)
# if n_jobs = -1, you will not get all fitting info

In [9]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [10]:
clf.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 200}

In [11]:
best_model = clf.best_estimator_

In [12]:
y_pred_tuned = best_model.predict(X_test)

#Evaluate the tuned model
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

Tuned model performance:
Accuracy: 0.8484848484848485
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.85      0.86       176
           1       0.83      0.85      0.84       154

    accuracy                           0.85       330
   macro avg       0.85      0.85      0.85       330
weighted avg       0.85      0.85      0.85       330

Confusion Matrix:
[[149  27]
 [ 23 131]]


# Adaboost Regressor

In [13]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [14]:
X, y = make_regression(n_samples = 1000, n_features = 2, noise = 10, random_state = 1)

In [15]:
#Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

In [16]:
regressor = AdaBoostRegressor()
regressor.fit(X_train, y_train)

In [17]:
y_pred = regressor.predict(X_test)
y_pred

array([  41.44143469,   46.73414036,   85.49133739,  -53.53032952,
        -77.82174826,   76.71894132,  -24.44287431,  -41.54497454,
        -10.10876874,    0.36173215,  215.23512984,   31.81703712,
       -175.33144895, -175.33144895,    3.66652518,   67.3446786 ,
        151.62972803,  -57.34573157,  -37.12363529,  132.51108115,
         89.00335082,  -82.16437678,  150.35271031,   43.87137407,
        -14.99725503,   -1.38923132, -183.57474971,  -78.2278252 ,
         76.71894132,  -78.2278252 ,   86.07182712,   63.67194269,
         75.6399619 ,  134.1326274 ,   44.65890163,   76.71894132,
        -10.7621995 ,   48.29105715,   54.20613697, -142.1360243 ,
         25.22854084,   77.63261207,   75.51152796,  -59.11549569,
        134.1326274 ,   38.73113931,   -2.76722674,  139.09037523,
        -24.44287431,  -55.50723125,  -78.2278252 ,  110.10762737,
         37.07375379, -175.33144895,   61.13625426,   59.36582775,
       -108.48649369,   74.48083239,  -53.53032952,   41.93926

In [18]:
#Model Evaluation
print("Current Model Performance:")
print(f"R2 Score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current Model Performance:
R2 Score: 0.9624785160807772
Mean Absolute Error: 13.064806698383137
Mean Squared Error: 293.94094368607153


In [19]:
#Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators' : [50, 100, 200],
    'learning_rate' : [0.01, 0.1, 0.5, 1.0, 1.5],
    'loss' : ['linear', 'square', 'exponential']
}
grid_search = GridSearchCV(estimator = regressor, param_grid = param_grid, cv = 5, verbose = 3)

In [20]:
grid_search

In [21]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.921 total time=   0.2s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.927 total time=   0.1s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.939 total time=   0.1s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.939 total time=   0.1s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.937 total time=   0.2s
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.927 total time=   0.3s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.926 total time=   0.3s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.941 total time=   0.3s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.944 total time=   0.3s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.938 to

In [22]:
grid_search.best_params_

{'learning_rate': 1.5, 'loss': 'square', 'n_estimators': 200}

In [23]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)
y_pred_tuned

array([ 3.31242347e+01,  4.24057234e+01,  9.18803576e+01, -5.84024744e+01,
       -8.94867894e+01,  8.89348183e+01, -2.68361709e+01, -3.36841028e+01,
       -6.08852502e+00,  6.22900019e+00,  2.06155841e+02,  2.34948804e+01,
       -1.74005431e+02, -1.79216512e+02,  7.58001913e+00,  7.36520369e+01,
        1.57432287e+02, -6.21987482e+01, -3.02989418e+01,  1.30940247e+02,
        9.48393042e+01, -8.75095593e+01,  1.49044882e+02,  3.32831126e+01,
       -8.26097380e+00,  1.58343860e+00, -1.83411321e+02, -8.54071435e+01,
        7.95689949e+01, -8.87746317e+01,  9.34618361e+01,  6.83253626e+01,
        8.05052845e+01,  1.47349823e+02,  4.00463179e+01,  8.89814129e+01,
       -8.70928728e+00,  4.31231981e+01,  5.43709646e+01, -1.37578978e+02,
        2.35982963e+01,  8.89814129e+01,  7.74982130e+01, -5.85681200e+01,
        1.47349823e+02,  2.69248366e+01, -3.06198284e+00,  1.44424458e+02,
       -2.69426446e+01, -6.12427230e+01, -8.54071435e+01,  1.12317472e+02,
        2.65331304e+01, -

In [25]:
#Evaluate Model
print("Tuned Model Performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Tuned Model Performance:
R2 score: 0.9706863673206597
Mean Absolute Error: 11.56054591582316
Mean Squared Error: 229.6411536170031
