## All Techniques of Hyper Parameter Optimization

In [1]:
# https://www.youtube.com/watch?v=355u2bDqB7c

# https://github.com/krishnaik06/All-Hyperparamter-Optimization/blob/master/Hyper%20Parameter%20Optimization.ipynb

In [2]:
# 1. GridSearchCV
# 2. Randomized SearchCV
# 3. bayesian Optimization _Automate Hyperparameter Tunning (Hyperopt)
# Sequential Model Based Optimization (Tuning a scikit-learn estimator with skopt)
# 5. Optuna- Automate Hyoperparameter Tuning
# 6. Genetic Algorithms (TPOT Classifier)


# References
- https://github.com/fmfn/BayesianOptimization
- https://github.com/hyperopt/hyperopt
- https://www.jeremyjordan.me/hyperparameter-tuning/
- https://optuna.org/
- https://towardsdatascience.com/hyperparameters-optimization-526348bb8e2d(By Pier Paolo Ippolito )
- https://scikit-optimize.github.io/stable/auto_examples/hyperparameter-optimization.html

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv("diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [7]:
# The Min value is 0 in different column 
# We need to check the meaning 
df.Pregnancies.value_counts().head()
# in the case of 0 its Man

1    135
0    111
2    103
3     75
4     68
Name: Pregnancies, dtype: int64

In [8]:
# diabetes datas was from kaggle
# there are 8 features dependend features
print(df.shape)

(768, 9)


In [9]:
import numpy as np
df["Glucose"] = np.where(df["Glucose"] == 0, df["Glucose"].median(), df["Glucose"])

In [10]:
df.Insulin.value_counts().head()

0      374
105     11
140      9
130      9
120      8
Name: Insulin, dtype: int64

In [11]:
# Insulin 0 is not use ful so lets replace this with mean value
df["Insulin"]  = np.where(df["Insulin"] == 0, df["Insulin"].median(), df["Insulin"])

In [12]:
df["SkinThickness"] = np.where(df["SkinThickness"] == 0, df["SkinThickness"].median(), df["SkinThickness"])

In [13]:
# dependent and independent features
x = df.drop("Outcome", axis = 1)
y = df["Outcome"]

In [14]:
x.head(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72,35.0,30.5,33.6,0.627,50
1,1,85.0,66,29.0,30.5,26.6,0.351,31


In [15]:
pd.DataFrame(x,columns=df.columns[:-1]).head(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72,35.0,30.5,33.6,0.627,50
1,1,85.0,66,29.0,30.5,26.6,0.351,31


In [16]:
x.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72,35.0,30.5,33.6,0.627,50
1,1,85.0,66,29.0,30.5,26.6,0.351,31
2,8,183.0,64,23.0,30.5,23.3,0.672,32
3,1,89.0,66,23.0,94.0,28.1,0.167,21
4,0,137.0,40,35.0,168.0,43.1,2.288,33


In [17]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [18]:
y.value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [19]:
## We can do Binary Classification

In [20]:
# Train Test Split data
from sklearn.model_selection import train_test_split

x_train,x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0) # no shuffled

In [21]:
train_ = y_train.value_counts()
display(train_)
print("Percent of 0 in Training data is ", train_[0]/ len(y_train))
print("Percent of 1 in Training data is ", train_[1]/ len(y_train))

0    393
1    221
Name: Outcome, dtype: int64

Percent of 0 in Training data is  0.6400651465798045
Percent of 1 in Training data is  0.35993485342019543


In [22]:
test_ = y_test.value_counts()
display(test_)
print("Percent of 0 in test data set is ", test_[0]/ len(y_test))
print("Percent of 1 in test data set is ", test_[1]/ len(y_test))

0    107
1     47
Name: Outcome, dtype: int64

Percent of 0 in test data set is  0.6948051948051948
Percent of 1 in test data set is  0.3051948051948052


In [23]:
# We can see in the Both case 1 is little less than 0
# Which may effect the overall results

## Imbalanced data set

In [24]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=10).fit(x_train, y_train)
prediction = rf_classifier.predict(x_test)

In [25]:
# Lets check Confusion Matrix
from sklearn.metrics import confusion_matrix,classification_report, accuracy_score

In [26]:
print(confusion_matrix(y_test, prediction))

[[94 13]
 [19 28]]


In [27]:
print(accuracy_score(y_test, prediction))

0.7922077922077922


In [28]:
print(classification_report(y_test, prediction))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       107
           1       0.68      0.60      0.64        47

    accuracy                           0.79       154
   macro avg       0.76      0.74      0.75       154
weighted avg       0.79      0.79      0.79       154



In [29]:
# The main parameters used by a Random Forest Classifier are:

**criterion** = the function usetd to evaluate the quality of a split <br>
**max_depth** = maximum number of levels allowed in each tree<br>
**max_feature** = maximum number of features considered when splitting a node<br>
**min_samples_leaf** = minimum number of samples which can be stored in a tree leaf<br>
**min_samples_split** = minimum number of samples necessary in a node to cause node splitting<br>
**n_estimators** = number of trees in the ensemble<br>

## Randomized Search CV 

In [30]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV

# Number of trees in random forest
n_estimators = [ int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]

# Number of features to consider at every split
max_features = ["auto", "sqrt", "log2"]

# Maximum number of lebels in tree
max_depth = [int(x) for x in np.linspace(10, 1000, 10)]

# Maximum Number of samples required to split a node
min_samples_split = [2,5,10, 14]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1,2, 4,6,8]

# Create the random grid
random_grid = {
    'n_estimators' : n_estimators,
    "max_features" : max_features,
    "max_depth" : max_depth,
    "min_samples_split" : min_samples_split,
    "min_samples_leaf" : min_samples_leaf,
    "criterion" :["entropy", "gini"]
}

print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


### apply Randomized Search CV

In [31]:
rf = RandomForestClassifier()

rf_randomcv = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, 
                                      cv = 3, verbose = 2, random_state = 100, n_jobs = -1)

# cv = cross validation
# n_jobs = Applying all cores (16 cores)
# fit the randomized model
rf_randomcv.fit(x_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:   56.8s finished


RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [32]:
rf_randomcv.best_params_

{'n_estimators': 1800,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_features': 'log2',
 'max_depth': 560,
 'criterion': 'entropy'}

In [33]:
rf_randomcv

RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [34]:
best_random_grid = rf_randomcv.best_estimator_

In [35]:
from sklearn.metrics import accuracy_score
y_pred = best_random_grid.predict(x_test)
print(confusion_matrix(y_test, y_pred))

[[94 13]
 [13 34]]


In [36]:
print("Accuracy Score {}",format(accuracy_score(y_test, y_pred)))

Accuracy Score {} 0.8311688311688312


In [37]:
# Accuracy Score is little bit increased , We can see

print("Classification Report : {}".format(classification_report(y_test, y_pred)))

Classification Report :               precision    recall  f1-score   support

           0       0.88      0.88      0.88       107
           1       0.72      0.72      0.72        47

    accuracy                           0.83       154
   macro avg       0.80      0.80      0.80       154
weighted avg       0.83      0.83      0.83       154



### Grid Search CV

In [38]:
# check Randomized Search parameter
rf_randomcv.best_params_

{'n_estimators': 1800,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_features': 'log2',
 'max_depth': 560,
 'criterion': 'entropy'}

In [39]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "criterion" : [rf_randomcv.best_params_["criterion"]],
    
    "max_depth" : [rf_randomcv.best_params_["max_depth"]],
    
    "max_features" : [rf_randomcv.best_params_["max_features"]],
    
    "min_samples_leaf" : [rf_randomcv.best_params_["min_samples_leaf"],
                         rf_randomcv.best_params_["min_samples_leaf"]+2,
                          rf_randomcv.best_params_["min_samples_leaf"]+4],
                          
    "min_samples_split":[rf_randomcv.best_params_["min_samples_split"] -2,
                          rf_randomcv.best_params_["min_samples_split"] -1,
                          rf_randomcv.best_params_["min_samples_split"],
                          rf_randomcv.best_params_["min_samples_split"] +1 ,
                          rf_randomcv.best_params_["min_samples_split"] +2],
                           
    "n_estimators" : [rf_randomcv.best_params_["n_estimators"] - 200, 
                      rf_randomcv.best_params_["n_estimators"] - 100,
                      rf_randomcv.best_params_["n_estimators"],
                      rf_randomcv.best_params_["n_estimators"] +100,
                      rf_randomcv.best_params_["n_estimators"] + 200]
}

print(param_grid)

{'criterion': ['entropy'], 'max_depth': [560], 'max_features': ['log2'], 'min_samples_leaf': [1, 3, 5], 'min_samples_split': [0, 1, 2, 3, 4], 'n_estimators': [1600, 1700, 1800, 1900, 2000]}


In [40]:
## Fid the grid_search to the data

rf = RandomForestClassifier()
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv = 10, n_jobs= -1, verbose = 2)

In [41]:
grid_search.fit(x_train, y_train)

Fitting 10 folds for each of 75 candidates, totalling 750 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 333 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 616 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 750 out of 750 | elapsed:  3.1min finished


GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['entropy'], 'max_depth': [560],
                         'max_features': ['log2'],
                         'min_samples_leaf': [1, 3, 5],
                         'min_samples_split': [0, 1, 2, 3, 4],
                         'n_estimators': [1600, 1700, 1800, 1900, 2000]},
             verbose=2)

In [42]:
grid_search.best_estimator_

RandomForestClassifier(criterion='entropy', max_depth=560, max_features='log2',
                       min_samples_split=4, n_estimators=1800)

In [43]:
best_grid = grid_search.best_estimator_

In [44]:
y_pred = best_grid.predict(x_test)

print(confusion_matrix(y_test, y_pred))
print("Accuracy Score {}".format(accuracy_score(y_test, y_pred)))

[[94 13]
 [13 34]]
Accuracy Score 0.8311688311688312


In [45]:
print("Classification Report : {}".format(classification_report(y_test, y_pred)))

Classification Report :               precision    recall  f1-score   support

           0       0.88      0.88      0.88       107
           1       0.72      0.72      0.72        47

    accuracy                           0.83       154
   macro avg       0.80      0.80      0.80       154
weighted avg       0.83      0.83      0.83       154



### Automated Hyperparameter Tuning

In [46]:
#https://github.com/krishnaik06/All-Hyperparamter-Optimization/blob/master/Hyper%20Parameter%20Optimization.ipynb

In [47]:
# Bayesian Optimization
# Gradient Descent
# Evolutionary Algorithms

### Bayesian Optimization
Bayesian optimization uses probability to find the minimum of a function. The final aim is to find the input value to a function which can gives usthe lowest possible output value. It usually performs better than random than random, grid and manual search providing better performance in the testing phase and reduced optimization time. In Hyperopt, Bayesian Optimization can be impolemented giving 3 three main parameters to the function fmin.

- Objective Function = defines the loss function to minimize
- Domain Space = defines the range of the input values to test (in Bayesian Optimization  this space creates a propability distribution for each of the used Hyperparameters)
- Optimization Algorithm = defines the search algorithm to use to select the best input values to use in each new iteration

In [48]:
# pip install hyperopt

In [49]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

In [50]:
space = {"criterion" : hp.choice("criterion", ["entropy", "gini"]),
        "max_depth" : hp.quniform("max_depth", 10,1200, 10),
        "max_features": hp.choice("max_features", ["auto", "sqrt", "log2", None]),
        "min_samples_leaf": hp.uniform("min_samples_leaf", 0, 0.5),
         "min_samples_split" : hp.uniform("min_samples_split")
        }

# hp.choise selection mechanism inside list
# hp.quniform integer number between numbers (select integer value between number)
# hp.uniform to select floating number
#

In [51]:
space

{'criterion': <hyperopt.pyll.base.Apply at 0x1d19290ce20>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x1d190366dc0>,
 'max_features': <hyperopt.pyll.base.Apply at 0x1d1929150a0>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x1d1929152b0>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x1d192915370>}

In [54]:

def objective(space):
    model = RandomForestClassifier(criterion = space['criterion'], max_depth = space['max_depth'],
                                 max_features = space['max_features'],
                                 min_samples_leaf = space['min_samples_leaf'],
                                 min_samples_split = space['min_samples_split'],
                                 n_estimators = space['n_estimators'], 
                                 )
    
    accuracy = cross_val_score(model, X_train, y_train, cv = 5).mean()

    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss': -accuracy, 'status': STATUS_OK }

In [55]:

from sklearn.model_selection import cross_val_score
trials = Trials()
best = fmin(fn= objective,
            space= space,
            algo= tpe.suggest,
            max_evals = 80,
            trials= trials)
best

  0%|          | 0/80 [00:00<?, ?trial/s, best loss=?]


TypeError: ap_uniform_sampler() missing 2 required positional arguments: 'low' and 'high'