# Hyper-Parameter Tuning
Hyper-parameter tuning is a process of selecting the best combination of hyper-parameters for a model. This is typically done using a `grid search` or `random search`. The goal is to find the combination of hyper-parameters that results in the best performance on thevalidation set.

In [2]:
# import libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# load dataset
from sklearn.datasets import load_iris
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [4]:
X = iris.data
y = iris.target

## Grid Search CV

In [8]:
# call the model
model = RandomForestClassifier()

# create the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'max_depth': [4, 5, 6, 7, 8, 9, 10],
    'criterion' : ['gini', 'entropy']
}

# create the grid search object
grid_search = GridSearchCV(estimator=model, 
                           param_grid=param_grid, 
                           cv=5,
                           scoring='accuracy',
                           n_jobs=-1, 
                           verbose=1)

# fit the model
grid_search.fit(X, y)

# print the best parameters
print(f'Best Parameters: \n{grid_search.best_params_}')

Fitting 5 folds for each of 84 candidates, totalling 420 fits
Best Parameters: 
{'criterion': 'gini', 'max_depth': 4, 'n_estimators': 50}


## Random Search CV

In [11]:
%%time
# call the model
model = RandomForestClassifier()
from sklearn.model_selection import RandomizedSearchCV
# create the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'max_depth': [4, 5, 6, 7, 8, 9, 10],
    'criterion' : ['gini', 'entropy']
}

# create the grid search object
grid_search = RandomizedSearchCV(estimator=model, 
                           param_distributions=param_grid, 
                           cv=5,
                           scoring='accuracy',
                           n_jobs=-1, 
                           verbose=1,
                           n_iter=10)
# fit the model
grid_search.fit(X, y)

# print the best parameters
print(f'Best Parameters: \n{grid_search.best_params_}')

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Parameters: 
{'n_estimators': 50, 'max_depth': 8, 'criterion': 'entropy'}
CPU times: total: 188 ms
Wall time: 18.2 s
