# All Techniques Of Hyper Parameter Optimization

#### 1. RandomizedSearchCV
#### 2. GridSearchCV
#### 3. Bayesian Optimization -Automate Hyperparameter Tuning (Hyperopt)
#### 4. Optuna- Automate Hyperparameter Tuning

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
import numpy as np
df['Glucose']=np.where(df['Glucose']==0,df['Glucose'].median(),df['Glucose'])
df['Insulin']=np.where(df['Insulin']==0,df['Insulin'].median(),df['Insulin'])
df['SkinThickness']=np.where(df['SkinThickness']==0,df['SkinThickness'].median(),df['SkinThickness'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72,35.0,30.5,33.6,0.627,50,1
1,1,85.0,66,29.0,30.5,26.6,0.351,31,0
2,8,183.0,64,23.0,30.5,23.3,0.672,32,1
3,1,89.0,66,23.0,94.0,28.1,0.167,21,0
4,0,137.0,40,35.0,168.0,43.1,2.288,33,1


In [4]:
#### Independent And Dependent features

X=df.drop('Outcome',axis=1)
y=df['Outcome']

In [5]:
pd.DataFrame(X,columns=df.columns[:-1])

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72,35.0,30.5,33.6,0.627,50
1,1,85.0,66,29.0,30.5,26.6,0.351,31
2,8,183.0,64,23.0,30.5,23.3,0.672,32
3,1,89.0,66,23.0,94.0,28.1,0.167,21
4,0,137.0,40,35.0,168.0,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101.0,76,48.0,180.0,32.9,0.171,63
764,2,122.0,70,27.0,30.5,36.8,0.340,27
765,5,121.0,72,23.0,112.0,26.2,0.245,30
766,1,126.0,60,23.0,30.5,30.1,0.349,47


In [6]:
#### Train Test Split

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
rf_classifier=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)
prediction=rf_classifier.predict(X_test)

In [9]:
y.value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [10]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print('Accuracy score:',accuracy_score(y_test,prediction))
print(confusion_matrix(y_test,prediction))
print(classification_report(y_test,prediction))



Accuracy score: 0.7857142857142857
[[95 12]
 [21 26]]
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       107
           1       0.68      0.55      0.61        47

    accuracy                           0.79       154
   macro avg       0.75      0.72      0.73       154
weighted avg       0.78      0.79      0.78       154



### The main parameters used by a Random Forest Classifier are:

##### 1. criterion = the function used to evaluate the quality of a split.
##### 2. max_depth = maximum number of levels allowed in each tree.
##### 3. max_features = maximum number of features considered when splitting a node.
##### 4. min_samples_leaf = minimum number of samples which can be stored in a tree leaf.
##### 5. min_samples_split = minimum number of samples necessary in a node to cause node splitting.
##### 6. n_estimators = number of trees in the ensemble.

### Manual Hyperparameter Tuning

In [11]:
model=RandomForestClassifier(n_estimators=300,criterion='entropy',max_features='sqrt',min_samples_leaf=10,random_state=100).fit(X_train,y_train)
prediction=model.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,prediction))
print(confusion_matrix(y_test,prediction))
print(classification_report(y_test,prediction))


Accuracy score: 0.8246753246753247
[[97 10]
 [17 30]]
              precision    recall  f1-score   support

           0       0.85      0.91      0.88       107
           1       0.75      0.64      0.69        47

    accuracy                           0.82       154
   macro avg       0.80      0.77      0.78       154
weighted avg       0.82      0.82      0.82       154



## Randomized Search CV

In [12]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV

# Number of trees in random forest
n_estimators=[int(x) for x in np.linspace(start=200,stop=2000,num=10)]

# Number of features to consider at every split
max_features=['auto','sqrt', 'log2']

# Maximum number of levels in tree
max_depth=[int(x) for x in np.linspace(10,1000,10)]

# Minimum number of samples required to split a node
min_samples_split=[2,5,10,14]

# Minimum number of samples required at each leaf node
min_samples_leaf=[1,2,4,6,8]

# Create the random grid
random_grid={'n_estimators':n_estimators,
            'max_features':max_features,
             'max_depth':max_depth,
             'min_samples_split':min_samples_split,
             'min_samples_leaf':min_samples_leaf,
             'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [13]:
rf=RandomForestClassifier()
rf_randomCV=RandomizedSearchCV(estimator=rf,param_distributions=random_grid,n_iter=100,cv=3,verbose=2,random_state=100,n_jobs=-1)

### fit the randomized model
rf_randomCV.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [14]:
rf_randomCV.best_params_

{'n_estimators': 200,
 'min_samples_split': 2,
 'min_samples_leaf': 2,
 'max_features': 'log2',
 'max_depth': 340,
 'criterion': 'entropy'}

In [15]:
rf_randomCV

RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [16]:
best_random_grid=rf_randomCV.best_estimator_

In [17]:
from sklearn.metrics import accuracy_score
y_pred=best_random_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print('Accuracy score {}'.format(accuracy_score(y_test,y_pred)))
print('Classification report {}'.format(classification_report(y_test,y_pred)))

[[95 12]
 [13 34]]
Accuracy score 0.8376623376623377
Classification report               precision    recall  f1-score   support

           0       0.88      0.89      0.88       107
           1       0.74      0.72      0.73        47

    accuracy                           0.84       154
   macro avg       0.81      0.81      0.81       154
weighted avg       0.84      0.84      0.84       154



## GridSearch CV

In [18]:
rf_randomCV.best_params_

{'n_estimators': 200,
 'min_samples_split': 2,
 'min_samples_leaf': 2,
 'max_features': 'log2',
 'max_depth': 340,
 'criterion': 'entropy'}

In [19]:
from sklearn.model_selection import GridSearchCV

In [20]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'criterion': [rf_randomCV.best_params_['criterion']],
    'max_depth': [rf_randomCV.best_params_['max_depth']],
    'max_features': [rf_randomCV.best_params_['max_features']],
    'min_samples_leaf': [rf_randomCV.best_params_['min_samples_leaf'], 
                         rf_randomCV.best_params_['min_samples_leaf']+2, 
                         rf_randomCV.best_params_['min_samples_leaf'] + 4],
    'min_samples_split': [rf_randomCV.best_params_['min_samples_split'] - 2,
                          rf_randomCV.best_params_['min_samples_split'] - 1,
                          rf_randomCV.best_params_['min_samples_split'], 
                          rf_randomCV.best_params_['min_samples_split'] +1,
                          rf_randomCV.best_params_['min_samples_split'] + 2],
    'n_estimators': [rf_randomCV.best_params_['n_estimators'] - 200, rf_randomCV.best_params_['n_estimators'] - 100, 
                     rf_randomCV.best_params_['n_estimators'], 
                     rf_randomCV.best_params_['n_estimators'] + 100, rf_randomCV.best_params_['n_estimators'] + 200]
}

print(param_grid)

{'criterion': ['entropy'], 'max_depth': [340], 'max_features': ['log2'], 'min_samples_leaf': [2, 4, 6], 'min_samples_split': [0, 1, 2, 3, 4], 'n_estimators': [0, 100, 200, 300, 400]}


In [21]:
#### Fit the grid_search to the data

rf=RandomForestClassifier()
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,n_jobs=-1,cv=10,verbose=2)
grid_search.fit(X_train,y_train)

Fitting 10 folds for each of 75 candidates, totalling 750 fits


GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['entropy'], 'max_depth': [340],
                         'max_features': ['log2'],
                         'min_samples_leaf': [2, 4, 6],
                         'min_samples_split': [0, 1, 2, 3, 4],
                         'n_estimators': [0, 100, 200, 300, 400]},
             verbose=2)

In [22]:
grid_search.best_estimator_

RandomForestClassifier(criterion='entropy', max_depth=340, max_features='log2',
                       min_samples_leaf=6, min_samples_split=3,
                       n_estimators=300)

In [23]:
best_grid=grid_search.best_estimator_

In [24]:
best_grid

RandomForestClassifier(criterion='entropy', max_depth=340, max_features='log2',
                       min_samples_leaf=6, min_samples_split=3,
                       n_estimators=300)

In [26]:
y_pred=best_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print('Accuracy score: {}'.format(accuracy_score(y_test,y_pred)))
print(classification_report(y_test,y_pred))

[[97 10]
 [14 33]]
Accuracy score: 0.8441558441558441
              precision    recall  f1-score   support

           0       0.87      0.91      0.89       107
           1       0.77      0.70      0.73        47

    accuracy                           0.84       154
   macro avg       0.82      0.80      0.81       154
weighted avg       0.84      0.84      0.84       154



## Automated Hyperparameter Tuning

Automated Hyperparameter Tuning can be done by using techniques such as:

##### 1.Bayesian Optimization
##### 2.Gradient Descent
##### 3.Evolutionary Algorithms


### Bayesian Optimization

Bayesian optimization uses probability to find the minimum of a function. The final aim is to find the input value to a function which can gives us the lowest possible output value.It usually performs better than random,grid and manual search providing better performance in the testing phase and reduced optimization time. In Hyperopt, Bayesian Optimization can be implemented giving 3 three main parameters to the function fmin.

##### 1.Objective Function = defines the loss function to minimize.
##### 2.Domain Space = defines the range of input values to test (in Bayesian Optimization this space creates a probability distribution for each of the used Hyperparameters).
##### 3.Optimization Algorithm = defines the search algorithm to use to select the best input values to use in each new iteration.

In [28]:
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials

In [29]:
space={'criterion':hp.choice('criterion',['entropy','gini']),                     ##choice--selection
      'max_depth':hp.quniform('max_depth',10,1200,10),                            ##quniform--selects integer values
      'max_features':hp.choice('max_features',['auto','sqrt','log2',None]),
      'min_samples_leaf':hp.uniform('min_samples_leaf',0,0.5),                    ##uniform--selects floats
      'min_samples_split':hp.uniform('min_samples_split',0,1),
      'n_estimators':hp.choice('n_estimators',[10, 50, 300, 750, 1200,1300,1500])}

In [30]:
space

{'criterion': <hyperopt.pyll.base.Apply at 0x15f1b59e430>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x15f1b59e130>,
 'max_features': <hyperopt.pyll.base.Apply at 0x15f1b4698b0>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x15f1b469a60>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x15f1b469e50>,
 'n_estimators': <hyperopt.pyll.base.Apply at 0x15f1b469d60>}

In [31]:
def objective(space):
    model=RandomForestClassifier(criterion=space['criterion'],
                                max_depth=space['max_depth'],
                                max_features=space['max_features'],
                                min_samples_leaf=space['min_samples_leaf'],
                                min_samples_split=space['min_samples_split'],
                                n_estimators=space['n_estimators'])
    
    accuracy=cross_val_score(model,X_train,y_train,cv=5).mean()
    
    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss':-accuracy,'status':STATUS_OK}

In [32]:
from sklearn.model_selection import cross_val_score
trials=Trials()
best=fmin(fn=objective,
         space=space,
         algo=tpe.suggest,
         max_evals=80,
         trials=trials)
best

100%|███████████████████████████████████████████████| 80/80 [29:24<00:00, 22.06s/trial, best loss: -0.7703585232573638]


{'criterion': 0,
 'max_depth': 1140.0,
 'max_features': 2,
 'min_samples_leaf': 0.08894422036326523,
 'min_samples_split': 0.10644335206994177,
 'n_estimators': 4}

In [33]:
crit={0:'entropy',1:'gini'}
feat={0:'auto',1:'sqrt',2:'log2',3:None}
est = {0: 10, 1: 50, 2: 300, 3: 750, 4: 1200,5:1300,6:1500}

print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

entropy
log2
1200


In [34]:
best['min_samples_leaf']

0.08894422036326523

In [38]:
trainedforest=RandomForestClassifier(criterion=crit[best['criterion']],
                                     max_depth=best['max_depth'],
                                     max_features=feat[best['max_features']],
                                     min_samples_leaf=best['min_samples_leaf'],
                                     min_samples_split=best['min_samples_split'],
                                     n_estimators=est[best['n_estimators']]).fit(X_train,y_train)

predictionforest = trainedforest.predict(X_test)
print(confusion_matrix(y_test,predictionforest))
print(accuracy_score(y_test,predictionforest))
print(classification_report(y_test,predictionforest))
acc5 = accuracy_score(y_test,predictionforest)

[[96 11]
 [24 23]]
0.7727272727272727
              precision    recall  f1-score   support

           0       0.80      0.90      0.85       107
           1       0.68      0.49      0.57        47

    accuracy                           0.77       154
   macro avg       0.74      0.69      0.71       154
weighted avg       0.76      0.77      0.76       154



## Optimize hyperparameters of the model using Optuna

The hyperparameters of the above algorithm are n_estimators and max_depth for which we can try different values to see if the model accuracy can be improved. The objective function is modified to accept a trial object. This trial has several methods for sampling hyperparameters. We create a study to run the hyperparameter optimization and finally read the best hyperparameters.

In [42]:

import optuna
import sklearn.svm
def objective(trial):

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 200, 2000,10)
        max_depth = int(trial.suggest_float('max_depth', 10, 100, log=True))

        clf = sklearn.ensemble.RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma='auto')

    return sklearn.model_selection.cross_val_score(
        clf,X_train,y_train, n_jobs=-1, cv=3).mean()

In [43]:
study=optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=100)

trial=study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2021-06-21 03:04:28,901][0m A new study created in memory with name: no-name-8f9f2fec-d960-4464-a274-5bcff6228323[0m
[32m[I 2021-06-21 03:05:12,150][0m Trial 0 finished with value: 0.7491790212019768 and parameters: {'classifier': 'RandomForest', 'n_estimators': 950, 'max_depth': 90.84840052666893}. Best is trial 0 with value: 0.7491790212019768.[0m
[32m[I 2021-06-21 03:05:22,425][0m Trial 1 finished with value: 0.7491630798660928 and parameters: {'classifier': 'RandomForest', 'n_estimators': 920, 'max_depth': 14.245797627872903}. Best is trial 0 with value: 0.7491790212019768.[0m
[32m[I 2021-06-21 03:05:34,772][0m Trial 2 finished with value: 0.7540650406504065 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1770, 'max_depth': 23.39991257944089}. Best is trial 2 with value: 0.7540650406504065.[0m
[32m[I 2021-06-21 03:05:36,875][0m Trial 3 finished with value: 0.640068547744301 and parameters: {'classifier': 'SVC', 'svc_c': 1554671939.415974}. Best i

[32m[I 2021-06-21 03:09:02,557][0m Trial 35 finished with value: 0.7507970667941973 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1370, 'max_depth': 16.83733965425094}. Best is trial 2 with value: 0.7540650406504065.[0m
[32m[I 2021-06-21 03:09:07,632][0m Trial 36 finished with value: 0.7508050374621393 and parameters: {'classifier': 'RandomForest', 'n_estimators': 820, 'max_depth': 22.735434254996317}. Best is trial 2 with value: 0.7540650406504065.[0m
[32m[I 2021-06-21 03:09:07,855][0m Trial 37 finished with value: 0.640068547744301 and parameters: {'classifier': 'SVC', 'svc_c': 308.81430654568607}. Best is trial 2 with value: 0.7540650406504065.[0m
[32m[I 2021-06-21 03:09:13,923][0m Trial 38 finished with value: 0.7475290929379882 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1030, 'max_depth': 18.953685132952348}. Best is trial 2 with value: 0.7540650406504065.[0m
[32m[I 2021-06-21 03:09:14,164][0m Trial 39 finished with value: 0.64006

[32m[I 2021-06-21 03:15:12,597][0m Trial 70 finished with value: 0.7524390243902439 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1740, 'max_depth': 32.182895103888676}. Best is trial 68 with value: 0.7573170731707317.[0m
[32m[I 2021-06-21 03:15:23,919][0m Trial 71 finished with value: 0.7524390243902439 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1890, 'max_depth': 20.8224345957276}. Best is trial 68 with value: 0.7573170731707317.[0m
[32m[I 2021-06-21 03:15:35,718][0m Trial 72 finished with value: 0.7475450342738722 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1990, 'max_depth': 25.700930052100485}. Best is trial 68 with value: 0.7573170731707317.[0m
[32m[I 2021-06-21 03:15:47,703][0m Trial 73 finished with value: 0.7475450342738722 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1800, 'max_depth': 46.59140536063879}. Best is trial 68 with value: 0.7573170731707317.[0m
[32m[I 2021-06-21 03:16:00,219][0

Accuracy: 0.7573170731707317
Best hyperparameters: {'classifier': 'RandomForest', 'n_estimators': 1960, 'max_depth': 36.19982225815653}


In [44]:
trial

FrozenTrial(number=68, values=[0.7573170731707317], datetime_start=datetime.datetime(2021, 6, 21, 3, 14, 35, 323991), datetime_complete=datetime.datetime(2021, 6, 21, 3, 14, 49, 804605), params={'classifier': 'RandomForest', 'n_estimators': 1960, 'max_depth': 36.19982225815653}, distributions={'classifier': CategoricalDistribution(choices=('RandomForest', 'SVC')), 'n_estimators': IntUniformDistribution(high=2000, low=200, step=10), 'max_depth': LogUniformDistribution(high=100.0, low=10.0)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=68, state=TrialState.COMPLETE, value=None)

In [45]:
study.best_params

{'classifier': 'RandomForest',
 'n_estimators': 1960,
 'max_depth': 36.19982225815653}

In [46]:
rf=RandomForestClassifier(n_estimators=330,max_depth=30)
rf.fit(X_train,y_train)

RandomForestClassifier(max_depth=30, n_estimators=330)

In [47]:
y_pred=rf.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[95 12]
 [15 32]]
0.8246753246753247
              precision    recall  f1-score   support

           0       0.86      0.89      0.88       107
           1       0.73      0.68      0.70        47

    accuracy                           0.82       154
   macro avg       0.80      0.78      0.79       154
weighted avg       0.82      0.82      0.82       154

