# Adaptive Boosting Technique

## Load required libraries

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# !pip uninstall -y scikit-learn
!pip install scikit-learn==0.23.2

In [None]:
import sklearn
print(sklearn.__version__)

## Load dataset and set discrete popularity

In [None]:
df = pd.read_csv('../../Dataset/data_merged.csv')
mean_popularity = 42
df["popularity"] = [ 1 if i >= mean_popularity else 0 for i in df.popularity ]
df["popularity"].head()

## Initiate sklearn's AdaBoostClassifier

In [None]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm="SAMME.R", learning_rate=1)

## Prepare and split data to train and test set

In [None]:
X  = df.drop(['popularity'], axis=1)
y = df['popularity']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=21)

## Train the model

In [None]:
ada_clf.fit(X_train, np.ravel(y_train))

## Testing model

In [None]:
y_train_pred = ada_clf.predict(X_train)

In [None]:
accuracy_score(y_train, y_train_pred)

In [None]:
y_pred = ada_clf.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
print(confusion_matrix(y_test, y_pred))
print("Classification report\n")
print(classification_report(y_test, y_pred))

## Bayesian Optimization for finding appropriate hyperparameters

In [None]:
!pip install scikit-optimize

In [None]:
!pip install matplotlib

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective
from matplotlib import pyplot as plt

In [None]:
opt = BayesSearchCV(
    AdaBoostClassifier(),
    {
        'n_estimators' : Integer(50, 250),
        'learning_rate' : Real(0.01, 1.0),
#         'C': (1e-6, 1e+6, 'log-uniform'),
#         'gamma': (1e-6, 1e+1, 'log-uniform'),
#         'degree': (1, 8),  # integer valued parameter
#         'kernel': ['linear', 'poly', 'rbf'],  # categorical parameter
    },
    n_iter=32,
    cv=3
)

In [None]:
opt.fit(X_train, y_train)

from joblib import dump
dump(opt, "Saved models/AdaBoost")

## Best parameters and scores obtained

In [None]:
print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(X_test, y_test))
print("best params: %s" % str(opt.best_params_))

In [None]:
y_pred = opt.predict(X_test)

In [None]:
print(confusion_matrix(y_test, y_pred))
print("Classification report\n")
print(classification_report(y_test, y_pred))

## Visualize results with partial dependence plots

In [None]:
plot_objective(opt.optimizer_results_[0],
                   dimensions=["n_estimators", "learning_rate"],
                       n_minimum_search=int(1e8))