# <b> Notebook 5 : Hyperparameter Tuning

In [1]:
# Import packages, functions
%run ../klicp/klicp_00_import_and_functions.ipynb
%run ../klicp/klicp_01_data_import_tools.ipynb
%run ../klicp/klicp_06_hyperparameter_tuning.ipynb

# Import dataframes
%run 01_processing.ipynb

#### We will tune the hyperparameters of our four best performing algorithms to see if we can improve the accuracy and f-score. We have to tune our parameters by the F-score because our dataset is unbalanced.

## <b> Hyperopt Method (Latest)

In [3]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [4]:
algo = 'XGBClassifier'
mode = 'classification'
parameters_space = spaces[algo]
num_eval = 500
trials = Trials()

partial_func = partial(objective_function, algo = algo, mode=mode, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)

best_param = fmin(partial_func, parameters_space, algo=tpe.suggest, max_evals=num_eval, trials=trials,
                      rstate= np.random.default_rng(1), early_stop_fn=no_progress_loss(round(num_eval/4)))

Parameters: { "colsample_bylevel", "colsample_bynode", "colsample_bytree", "gamma", "max_delta_step", "max_depth", "max_leaves", "min_child_weight", "subsample", "tree_method" } are not used.

Parameters: { "colsample_bylevel", "colsample_bynode", "colsample_bytree", "gamma", "max_delta_step", "max_depth", "max_leaves", "min_child_weight", "scale_pos_weight", "subsample", "tree_method" } are not used.

Parameters: { "scale_pos_weight" } are not used.

Parameters: { "colsample_bylevel", "colsample_bynode", "colsample_bytree", "gamma", "max_delta_step", "max_depth", "max_leaves", "min_child_weight", "scale_pos_weight", "subsample", "tree_method" } are not used.

Parameters: { "scale_pos_weight" } are not used.

Parameters: { "colsample_bylevel", "colsample_bynode", "colsample_bytree", "gamma", "max_delta_step", "max_depth", "max_leaves", "min_child_weight", "scale_pos_weight", "subsample", "tree_method" } are not used.

Parameters: { "colsample_bylevel", "colsample_bynode", "colsample_by

In [5]:
best_param_export = convert_best_param(best_param, parameters_space)
best_param_export

# Pickling the dictionary
with open("best_param_%s.pickle"%(algo), "wb") as f:
    pickle.dump(best_param_export, f)

In [6]:
best_param_export

{'objective': 'binary:hinge',
 'eval_metric': 'merror',
 'booster': 'dart',
 'learning_rate': 0.027190854916362413,
 'gamma': 3,
 'max_depth': 838,
 'min_child_weight': 2,
 'max_delta_step': 155,
 'subsample': 0.6812427820750524,
 'colsample_bytree': 0.8081818371086672,
 'colsample_bylevel': 0.9587281464522058,
 'colsample_bynode': 0.6388300410263845,
 'reg_lambda': 0.013703407002288373,
 'reg_alpha': 0.02994454816035446,
 'tree_method': 'auto',
 'scale_pos_weight': 524.0,
 'max_leaves': 6}

### Quick comparison

In [7]:
# Baseline Model
clf = XGBClassifier()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
f1_score(y_test,y_pred)

0.92629179331307

In [8]:
# Best Params Model
clf = XGBClassifier(**best_param_export)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
f1_score(y_test,y_pred)

Parameters: { "scale_pos_weight" } are not used.



0.9353233830845772

#### We can see that the hyperparameter optimized model is performing better than the baseline one

## <b> Grid Search Method (Deprecated)

## Gradient Boosting

In [15]:
parameters = [{
    "loss":["deviance"],
    "learning_rate": [0.2,0.3,0.4],
    "min_samples_split": [0.01,0.1, 0.5],
    "min_samples_leaf": [0.0001,0.001, 0.01],
    "max_depth":[8,10,15],
    "max_features":["log2","sqrt"],
    "criterion": ["friedman_mse",  "mae"],
    "subsample":[0.7,0.8,0.9],
    "n_estimators":[10,20,30]
    }]
gbm = GridSearchCV(GradientBoostingClassifier(), parameters, cv=5, n_jobs=-1,scoring='f1', verbose=True)
gbm.fit(X_train,y_train)


Fitting 5 folds for each of 2916 candidates, totalling 14580 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 576 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done 1276 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done 2176 tasks      | elapsed:   30.9s
[Parallel(n_jobs=-1)]: Done 3276 tasks      | elapsed:   42.8s
[Parallel(n_jobs=-1)]: Done 4576 tasks      | elapsed:   59.3s
[Parallel(n_jobs=-1)]: Done 6076 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 7566 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 8516 tasks      | elapsed: 10.9min
[Parallel(n_jobs=-1)]: Done 9566 tasks      | elapsed: 19.9min
[Parallel(n_jobs=-1)]: Done 10716 tasks      | elapsed: 29.2min
[Parallel(n_jobs=-1)]: Done 11966 tasks      | elapsed: 39.6min
[Parallel(n_jobs=-1)]: Done 13316 tasks      | elapsed: 50.2min
[Parallel(n_jobs=-1)]: Done 14580 out of 

GridSearchCV(cv=5, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid=[{'criterion': ['friedman_mse', 'mae'],
                          'learning_rate': [0.2, 0.3, 0.4],
                          'loss': ['deviance'], 'max_depth': [8, 10, 15],
                          'max_features': ['log2', 'sqrt'],
                          'min_samples_leaf': [0.0001, 0.001, 0.01],
                          'min_samples_split': [0.01, 0.1, 0.5],
                          'n_estimators': [10, 20, 30],
                          'subsample': [0.7, 0.8, 0.9]}],
             scoring='f1', verbose=True)

In [16]:
print(gbm.best_params_)

{'criterion': 'friedman_mse', 'learning_rate': 0.2, 'loss': 'deviance', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 0.0001, 'min_samples_split': 0.1, 'n_estimators': 30, 'subsample': 0.7}


In [18]:
# Gradient boosting with best parameters
gs_gbm = GradientBoostingClassifier( 
    criterion='friedman_mse',
    learning_rate=0.2,
    loss='deviance',
    max_depth=10,
    max_features='sqrt',
    min_samples_leaf= 0.0001,
    min_samples_split= 0.1,
    n_estimators = 30,
    subsample = 0.7
)
gs_gbm.fit(X_train, y_train)
gs_gbm.score(X_test,y_test)

0.8926666666666667

## Stochastic gradient descent

In [18]:
parameters = [{
    "max_iter": [100,200,300],
    "loss": ["hinge","log","squared_hinge","perceptron"],
    "penalty": ["elasticnet", "l1", "l2"],
    "alpha": [0.0001, 0.001, 0.01, 0.1], 
    "learning_rate": ['constant', 'optimal', 'invscaling', 'adaptive'],
    "class_weight": [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}],
    "eta0": [1, 10] 
             }]
sgd = GridSearchCV(SGDClassifier(), parameters, cv=5, n_jobs=-1,scoring='f1', verbose=True)
sgd.fit(X_train,y_train)


Fitting 5 folds for each of 4608 candidates, totalling 23040 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 952 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done 1828 tasks      | elapsed:   13.3s
[Parallel(n_jobs=-1)]: Done 3224 tasks      | elapsed:   24.6s
[Parallel(n_jobs=-1)]: Done 5024 tasks      | elapsed:   39.9s
[Parallel(n_jobs=-1)]: Done 7292 tasks      | elapsed:   56.8s
[Parallel(n_jobs=-1)]: Done 9848 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 13368 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 16992 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 23040 out of 23040 | elapsed:  2.5min finished


GridSearchCV(cv=5, estimator=SGDClassifier(), n_jobs=-1,
             param_grid=[{'alpha': [0.0001, 0.001, 0.01, 0.1],
                          'class_weight': [{0: 0.5, 1: 0.5}, {0: 0.6, 1: 0.4},
                                           {0: 0.4, 1: 0.6}, {0: 0.3, 1: 0.7}],
                          'eta0': [1, 10],
                          'learning_rate': ['constant', 'optimal', 'invscaling',
                                            'adaptive'],
                          'loss': ['hinge', 'log', 'squared_hinge',
                                   'perceptron'],
                          'max_iter': [100, 200, 300],
                          'penalty': ['elasticnet', 'l1', 'l2']}],
             scoring='f1', verbose=True)

In [35]:
print(sgd.best_params_)

{'alpha': 0.001, 'class_weight': {1: 0.7, 0: 0.3}, 'eta0': 10, 'learning_rate': 'adaptive', 'loss': 'perceptron', 'max_iter': 300, 'penalty': 'elasticnet'}


In [19]:
# Stochastic gradient descent with best parameters
gs_sgd = SGDClassifier(alpha =0.001, 
                       class_weight ={1: 0.7, 0: 0.3}, 
                       eta0 =10, 
                       learning_rate = 'adaptive', 
                       loss= 'perceptron', 
                       max_iter=300, 
                       penalty= 'elasticnet')
gs_sgd.fit(X_train, y_train)
gs_sgd.score(X_test,y_test)

0.8846666666666667

## Random Forest

In [21]:
parameters = [{
    "criterion": ["gini",  "entropy"],
    "n_estimators": [200,400,600,800,1000],
    "max_features": ['auto', 'sqrt'],
    "max_depth": [5, 50, 100],
    "min_samples_split": [5,10,15,20,25,30], 
    "min_samples_leaf": [1,5,10,15,20],
    "bootstrap": [True, False]
             }]

rf = GridSearchCV(RandomForestClassifier(), 
                  parameters, 
                  cv=5, 
                  n_jobs=-1,
                  scoring='f1', 
                  verbose=True)

rf.fit(X_train,y_train)

Fitting 5 folds for each of 3600 candidates, totalling 18000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   47.5s
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 776 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 1226 tasks      | elapsed:  6.0min
[Parallel(n_jobs=-1)]: Done 1776 tasks      | elapsed:  9.1min
[Parallel(n_jobs=-1)]: Done 2426 tasks      | elapsed: 13.1min
[Parallel(n_jobs=-1)]: Done 3176 tasks      | elapsed: 17.5min
[Parallel(n_jobs=-1)]: Done 4026 tasks      | elapsed: 23.2min
[Parallel(n_jobs=-1)]: Done 4976 tasks      | elapsed: 28.4min
[Parallel(n_jobs=-1)]: Done 6026 tasks      | elapsed: 34.0min
[Parallel(n_jobs=-1)]: Done 7176 tasks      | elapsed: 41.6min
[Parallel(n_jobs=-1)]: Done 8426 tasks      | elapsed: 49.7min
[Parallel(n_jobs=-1)]: Done 9776 tasks      | elapsed: 57.3min
[Parallel(n_jobs=-1)]: Done 11226 tasks      

GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid=[{'bootstrap': [True, False],
                          'criterion': ['gini', 'entropy'],
                          'max_depth': [5, 50, 100],
                          'max_features': ['auto', 'sqrt'],
                          'min_samples_leaf': [1, 5, 10, 15, 20],
                          'min_samples_split': [5, 10, 15, 20, 25, 30],
                          'n_estimators': [200, 400, 600, 800, 1000]}],
             scoring='f1', verbose=True)

In [40]:
print(rf.best_params_)

{'bootstrap': False, 'criterion': 'entropy', 'max_depth': 100, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 25, 'n_estimators': 800}


In [20]:
# Random forest with best parameters
gs_rf = RandomForestClassifier(bootstrap=False,
                               criterion='entropy',
                               max_depth= 100, 
                               max_features='sqrt', 
                               min_samples_leaf=1, 
                               min_samples_split=25, 
                               n_estimators=800)
gs_rf.fit(X_train, y_train)
gs_rf.score(X_test,y_test)

0.9

## Decision Tree

In [24]:
parameters = [{
    "criterion": ['gini','entropy'],
    "max_depth": range(1,10),
    "min_samples_split": range(1,10), 
    "min_samples_leaf": [1,2,5,10],
             }]

dt = GridSearchCV(DecisionTreeClassifier(),
                  parameters, 
                  cv=5, n_jobs=-1,
                  scoring='f1', 
                  verbose=True)

dt.fit(X_train,y_train)

Fitting 5 folds for each of 648 candidates, totalling 3240 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 1640 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 3240 out of 3240 | elapsed:    6.2s finished


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid=[{'criterion': ['gini', 'entropy'],
                          'max_depth': range(1, 10),
                          'min_samples_leaf': [1, 2, 5, 10],
                          'min_samples_split': range(1, 10)}],
             scoring='f1', verbose=True)

In [43]:
print(dt.best_params_)

{'criterion': 'gini', 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [21]:
# Decision Tree with best parameters
gs_dt = DecisionTreeClassifier(criterion='gini', 
                               max_depth= 6, 
                               min_samples_leaf=1, 
                               min_samples_split=2,)
gs_dt.fit(X_train, y_train)
gs_dt.score(X_test,y_test)

0.8913333333333333