In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

%run util.ipynb

# Adaptive Boosting

In [89]:
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html

## Load Data

In [90]:
X, Y = get_data()

In [91]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)

scalar = StandardScaler()
Xtrain = scalar.fit_transform(Xtrain)
Xtest = scalar.transform(Xtest)

In [92]:
clf = AdaBoostClassifier(random_state=42)

clf.fit(Xtrain, Ytrain)

In [93]:
Ypred = clf.predict(Xtest)

accuracy_score(Ytest, Ypred)

0.8125

### hyperparam tuning 

In [94]:
clf.get_params()

{'algorithm': 'deprecated',
 'estimator': None,
 'learning_rate': 1.0,
 'n_estimators': 50,
 'random_state': 42}

In [95]:
# n_estimator tuning following paper methodology doesn't work with non ints (typo?)

param_grid_adaBt = {
    'learning_rate': [1.0, 0.15, 0.1, 0.05, 0.01, 0.005, 0.001],
    'n_estimators': [1, 10, 50, 100, 150, 200, 250, 300]
}

grid_search_adaBt = GridSearchCV(estimator=AdaBoostClassifier(random_state=42), 
                                 param_grid=param_grid_adaBt, cv=10, scoring='accuracy',
                                 n_jobs=-1)

In [96]:
grid_search_adaBt.fit(Xtrain, Ytrain)

In [97]:
grid_search_adaBt.best_params_, grid_search_adaBt.best_score_

({'learning_rate': 1.0, 'n_estimators': 150}, np.float64(0.7804347826086956))

In [98]:
model = grid_search_adaBt.best_estimator_

Ypred = model.predict(Xtest)

accuracy_score(Ytest, Ypred)

0.8125

In [99]:
print(classification_report(Ytest, Ypred))

              precision    recall  f1-score   support

           0       0.82      0.82      0.82        50
           1       0.80      0.80      0.80        46

    accuracy                           0.81        96
   macro avg       0.81      0.81      0.81        96
weighted avg       0.81      0.81      0.81        96



### check for overfitting 

In [100]:
YtrainPred = model.predict(Xtrain)

accuracy_score(Ytrain, YtrainPred)

0.9910313901345291