# All Techniques Of Hyper Parameter Optimization

1. GridSearchCV
2. RandomizedSearchCV
3. Bayesian Optimization -Automate Hyperparameter Tuning (Hyperopt)
4. Sequential Model Based Optimization(Tuning a scikit-learn estimator with skopt)
4. Optuna- Automate Hyperparameter Tuning
5. Genetic Algorithms (TPOT Classifier)

In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.isnull().sum().sum()

0

In [4]:
df['Glucose']=np.where(df['Glucose']==0,df['Glucose'].median(),df['Glucose'])
df['BloodPressure']=np.where(df['BloodPressure']==0,df['BloodPressure'].median(),df['BloodPressure'])
df['SkinThickness']=np.where(df['SkinThickness']==0,df['SkinThickness'].median(),df['SkinThickness'])
df['Insulin']=np.where(df['Insulin']==0,df['Insulin'].median(),df['Insulin'])
df['BMI']=np.where(df['BMI']==0,df['BMI'].median(),df['BMI'])
df['Age']=np.where(df['Age']==0,df['Age'].median(),df['Age'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50.0,1
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31.0,0
2,8,183.0,64.0,23.0,30.5,23.3,0.672,32.0,1
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21.0,0
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,1


In [5]:
x=df.drop(labels=['Outcome'],axis=1)
y=df['Outcome']
x.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50.0
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31.0
2,8,183.0,64.0,23.0,30.5,23.3,0.672,32.0
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21.0
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33.0


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

In [7]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
rf=RandomForestClassifier(n_estimators=10).fit(x_train,y_train)
y_pred=rf.predict(x_test)
print('accuracy_score:',accuracy_score(y_test,y_pred))
print('confusion_matrix:',confusion_matrix(y_test,y_pred))
print('classification_report:',classification_report(y_test,y_pred))

accuracy_score: 0.7532467532467533
confusion_matrix: [[87 13]
 [25 29]]
classification_report:               precision    recall  f1-score   support

           0       0.78      0.87      0.82       100
           1       0.69      0.54      0.60        54

    accuracy                           0.75       154
   macro avg       0.73      0.70      0.71       154
weighted avg       0.75      0.75      0.74       154



#### Randomized Search Cv

RandomizedSearchCV randomly selects few parameters combinations defined and finds the best among them

In [10]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 100, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(2, 9,4)]
# Minimum number of samples required to split a node
min_samples_split = [2,3,5,7]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 3,5,7]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [2, 4, 6, 9], 'min_samples_split': [2, 3, 5, 7], 'min_samples_leaf': [1, 2, 3, 5, 7], 'criterion': ['entropy', 'gini']}


In [11]:
rf=RandomForestClassifier()
random_rf=RandomizedSearchCV(rf,param_distributions=random_grid,n_iter=100,verbose=2,random_state=42,n_jobs=-1)
random_rf.fit(x_train,y_train)
y_pred=random_rf.predict(x_test)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   42.6s
[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  1.4min finished


In [12]:
random_rf.best_estimator_

RandomForestClassifier(criterion='entropy', max_depth=6, max_features='log2',
                       min_samples_split=5, n_estimators=80)

In [13]:
print('accuracy_score:',accuracy_score(y_test,y_pred))
print('confusion_matrix:',confusion_matrix(y_test,y_pred))
print('classification_report:',classification_report(y_test,y_pred))

accuracy_score: 0.7337662337662337
confusion_matrix: [[83 17]
 [24 30]]
classification_report:               precision    recall  f1-score   support

           0       0.78      0.83      0.80       100
           1       0.64      0.56      0.59        54

    accuracy                           0.73       154
   macro avg       0.71      0.69      0.70       154
weighted avg       0.73      0.73      0.73       154



#### GridSearch CV

In GridSearch CV , all combinations of paramters given are tried and the best one is selected. This gives better results than Randomized cv but it takes more time.

We can select params range for grid search using the results obtained from Random search

In [15]:
from sklearn.model_selection import GridSearchCV

n_estimators = [70,80,100]

max_features = ['sqrt','log2']

max_depth = [5,6,7]

min_samples_split = [4,5,6]

#min_samples_leaf = [1, 2, 3,5,7]

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               #'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [70, 80, 100], 'max_features': ['sqrt', 'log2'], 'max_depth': [5, 6, 7], 'min_samples_split': [4, 5, 6], 'criterion': ['entropy', 'gini']}


In [18]:
rf=RandomForestClassifier()
grid_rf=GridSearchCV(rf,param_grid=random_grid,verbose=2,cv=3,n_jobs=-1)
grid_rf.fit(x_train,y_train)
y_pred=grid_rf.predict(x_test)

Fitting 3 folds for each of 108 candidates, totalling 324 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    9.9s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 324 out of 324 | elapsed:   56.3s finished


In [19]:
grid_rf.best_estimator_

RandomForestClassifier(max_depth=6, max_features='log2', min_samples_split=6)

In [20]:
print('accuracy_score:',accuracy_score(y_test,y_pred))
print('confusion_matrix:',confusion_matrix(y_test,y_pred))
print('classification_report:',classification_report(y_test,y_pred))

accuracy_score: 0.7272727272727273
confusion_matrix: [[83 17]
 [25 29]]
classification_report:               precision    recall  f1-score   support

           0       0.77      0.83      0.80       100
           1       0.63      0.54      0.58        54

    accuracy                           0.73       154
   macro avg       0.70      0.68      0.69       154
weighted avg       0.72      0.73      0.72       154



# Automated Hyperparameter Tuning
Automated Hyperparameter Tuning can be done by using techniques such as 
- Bayesian Optimization
- Gradient Descent
- Evolutionary Algorithms

# Bayesian Optimization
Bayesian optimization uses probability to find the minimum of a function. The final aim is to find the input value to a function which can gives us the lowest possible output value.It usually performs better than random,grid and manual search providing better performance in the testing phase and reduced optimization time.
In Hyperopt, Bayesian Optimization can be implemented giving 3 three main parameters to the function fmin.

- Objective Function = defines the loss function to minimize.
- Domain Space = defines the range of input values to test (in Bayesian Optimization this space creates a probability distribution for each of the used Hyperparameters).
- Optimization Algorithm = defines the search algorithm to use to select the best input values to use in each new iteration.

#credits to Krish Naik for best material and explanation about Hyper param tuning

In [23]:
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials

In [34]:
space = {'criterion': hp.choice('criterion', ['entropy', 'gini']),
        'max_depth': hp.quniform('max_depth', 10, 100, 10),
        'max_features': hp.choice('max_features', ['auto', 'sqrt','log2', None]),
        'min_samples_leaf': hp.uniform('min_samples_leaf', 0, 0.5),
        'min_samples_split' : hp.uniform ('min_samples_split', 0, 1),
        'n_estimators' : hp.choice('n_estimators', [10, 50,100,200])
    }
space

{'criterion': <hyperopt.pyll.base.Apply at 0x2382da66308>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x2382c7d31c8>,
 'max_features': <hyperopt.pyll.base.Apply at 0x2382da95208>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x2382c4d1ac8>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x2382d5f7c48>,
 'n_estimators': <hyperopt.pyll.base.Apply at 0x2382db4d988>}

In [35]:
from sklearn.model_selection import cross_val_score
def objective(space):
    model = RandomForestClassifier(criterion = space['criterion'], max_depth = space['max_depth'],
                                 max_features = space['max_features'],
                                 min_samples_leaf = space['min_samples_leaf'],
                                 min_samples_split = space['min_samples_split'],
                                 n_estimators = space['n_estimators']
                                 )
    
    accuracy = cross_val_score(model, x_train, y_train, cv = 5)
    print(accuracy)
    accuracy=accuracy.mean()
    return {'loss': -accuracy, 'status': STATUS_OK }

In [36]:
#print(space['criterion'],space['max_depth'],space['max_features'],space['min_samples_leaf'],space['min_samples_split'],space['n_estimators'])

In [37]:
trials = Trials()
best = fmin(fn= objective,
            space= space,
            algo= tpe.suggest,
            max_evals = 50,
            trials= trials)
print(best)

[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.81300813 0.74796748 0.68292683 0.73170732 0.7704918 ]                                                               
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.6504065 0.6504065 0.6504065 0.6504065 0.6557377]                                                                    
[0.77235772 0.74796748 0.72357724 0.7073

In [38]:
crit = {0: 'entropy', 1: 'gini'}
feat = {0: 'auto', 1: 'sqrt', 2: 'log2', 3: None}
est = {0: 10, 1: 50, 2: 100, 3: 200}


print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

gini
sqrt
10


In [40]:
forest = RandomForestClassifier(criterion = crit[best['criterion']], max_depth = best['max_depth'], 
                                       max_features = feat[best['max_features']], 
                                       min_samples_leaf = best['min_samples_leaf'], 
                                       min_samples_split = best['min_samples_split'], 
                                       n_estimators = est[best['n_estimators']]).fit(x_train,y_train)
predictionforest = forest.predict(x_test)
print(confusion_matrix(y_test,predictionforest))
print(accuracy_score(y_test,predictionforest))
print(classification_report(y_test,predictionforest))
acc = accuracy_score(y_test,predictionforest)

[[89 11]
 [30 24]]
0.7337662337662337
              precision    recall  f1-score   support

           0       0.75      0.89      0.81       100
           1       0.69      0.44      0.54        54

    accuracy                           0.73       154
   macro avg       0.72      0.67      0.68       154
weighted avg       0.73      0.73      0.72       154



If we tune the paramters further, we can get better results by using the above hyper parameter tuning.