# Hyperparameter Tuning



In [None]:
#Make sure this is installed
# !pip install scikit-optimize

In [1]:
#import relevant libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from skopt import BayesSearchCV
# parameter ranges are specified by one of below
from skopt.space import Real, Integer

In [3]:
import os
os.getcwd()

'c:\\Users\\20805973\\OneDrive - ChampionX\\Documents\\SNAJAM\\01_Work\\02_Trainings\\03_Coursera\\03_Fractal_Data_Science_Professional_Certificate\\05_Advanced_Algorithms\\04_Module4\\Lesson-2---Hyperparameter-Tuning-ResourcesArchive'

In [4]:
#read train, validation and test data
df_train = pd.read_csv('train_new.csv')
df_test = pd.read_csv('test_new.csv')
df_val = pd.read_csv('val_new.csv')

In [5]:
x_train = df_train.drop(['Units_sold>1000'], axis = 1)
y_train = df_train['Units_sold>1000']

In [6]:
x_test = df_test.drop(['Units_sold>1000'], axis = 1)
y_test = df_test['Units_sold>1000']

In [7]:
x_val = df_val.drop(['Units_sold>1000'], axis = 1)
y_val = df_val['Units_sold>1000']

## Video 2 : Grid Search

<p style = 'color:green'><b>Run all the cells above before you begin</b><p>

- n-estimator controls the number of decision trees which will be built in sequence in a boosted ensemble model. So, increaing the nummber of estimators will lead to better performance of model, but a very high number will lead to overfitting. Since our goal is to increase the performance of the model on test data, we will check the performance of the model across a list of values - [50, 100, 200, 300, 400].

- Now max_depth is used to set the depth of the tree. Higher the number, more the model learns from training data and hence may result in overfitting. Since our model was slightly overfitting at the end of feature engineering  at a max_depth of 9. Let us set the range between 6 to 14 with a gap of 2.

- The learning_rate ontrols the weightage each model gets for the final prediction. A higher value of learning_rate can lead to overfit models. So far we have been using the default value of 0.1. But we will be trying two other values i.e. 0.2 and 0.3 along with the default value this time. 

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
#define the grid
param_grid = {
    'n_estimators': [50, 100, 200, 300, 400],
    'max_depth': range(6, 13, 2),
    'learning_rate': [0.1, 0.2, 0.3]
}

In [11]:
#define the model instance 
model_GBC = GradientBoostingClassifier(random_state=42)

In [12]:
#define grid search cv
grid_search_cv = GridSearchCV(estimator=model_GBC, param_grid=param_grid, n_jobs=-1, verbose=2,
                      cv=5, scoring='f1')

In [14]:
# fit the grid search model
%time
grid_search_cv.fit(x_train, y_train)

CPU times: total: 0 ns
Wall time: 0 ns
Fitting 5 folds for each of 60 candidates, totalling 300 fits


In [15]:
# Best performing model
grid_search_cv.best_estimator_

In [16]:
# Best performing model
grid_search_cv.best_params_

{'learning_rate': 0.3, 'max_depth': 10, 'n_estimators': 400}

In [17]:
# Mean cross-validated F1 score of the best estimator
grid_search_cv.best_score_

0.9082855275520932

In [18]:
#predict the dependent values
y_train_grid_search_pred = grid_search_cv.predict(x_train)
y_val_grid_search_pred = grid_search_cv.predict(x_val)

f1_train_grid_search = f1_score(y_train, y_train_grid_search_pred)
f1_val_grid_search = f1_score(y_val, y_val_grid_search_pred)

print("F1 Score on Train data:", f1_train_grid_search)
print("F1 Score on Val data:", f1_val_grid_search)

F1 Score on Train data: 1.0
F1 Score on Val data: 0.9140120757859671


## Video 3 - RandomizedSearchCV
<p style = 'color:green'><b>Run all the cells above before you begin</b><p>


In [19]:
from sklearn.model_selection import RandomizedSearchCV

In [20]:
random_cv = RandomizedSearchCV(estimator=model_GBC, n_iter=10, 
                               param_distributions=param_grid, n_jobs=-1, 
                               cv=5, scoring='f1')

In [21]:
%%time
random_cv.fit(x_train, y_train)

CPU times: total: 34.6 s
Wall time: 1min 58s


In [22]:
#best model estimator as per random search
random_cv.best_estimator_

In [23]:
random_cv.best_params_

{'n_estimators': 400, 'max_depth': 8, 'learning_rate': 0.3}

In [24]:
#cross validation score for the best model after random search
random_cv.best_score_

0.9072248881879574

In [25]:
y_train_randomcv_pred = random_cv.predict(x_train)

y_val_randomcv_pred = random_cv.predict(x_val)

f1_train_randomcv = f1_score(y_train, y_train_randomcv_pred)
f1_val_randomcv = f1_score(y_val, y_val_randomcv_pred)

print("F1 Score on Train data:", f1_train_randomcv)
print("F1 Score on Val data:", f1_val_randomcv)

F1 Score on Train data: 1.0
F1 Score on Val data: 0.9157279933249896


## Video 5 - Bayesian Optimization
<p style = 'color:green'><b>Run all the cells above before you begin</b><p>


In [26]:
from skopt import BayesSearchCV
# parameter ranges are specified by one of below
from skopt.space import Real, Integer

In [27]:
# log-uniform: understand as search over p = exp(x) by varying x
opt = BayesSearchCV(
    GradientBoostingClassifier(),
    {
        'learning_rate': Real(0.1, 0.3),
        'max_depth': Integer(6, 13),
        'n_estimators': Integer(50, 500),
    },
    n_iter=10,
    random_state=0,
    n_jobs=-1
)

In [28]:
%%time
_ = opt.fit(x_train, y_train)

CPU times: total: 41.7 s
Wall time: 6min 40s


In [29]:
opt.best_params_

OrderedDict([('learning_rate', 0.20619283299042945),
             ('max_depth', 11),
             ('n_estimators', 330)])

In [30]:
print(opt.score(x_train, y_train))
print(opt.score(x_val, y_val))

1.0
0.8981151299032094
