# Hypertuning the model
Hypertuning is a technique used to find the best parameters for a model. We can do this by changing the values of the parameters.

**Types of hyperparameters**
- grid search 
- random search
- bayesian optimization
- gradient based optimization


# Cross validation
Cross validation is a technique used to validate the model. We can do this by changing the values of the parameters.

**Types of cross validation**
- K-fold
- Stratified K-fold

In [2]:
#importing libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
#loading dataset 
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
# defining random forest classifier
rfc = RandomForestClassifier()

#create a parameter grid
param_grid = {'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],# n_estimators is the number of trees in the forest
    'max_features': ['auto', 'sqrt', 'log2'], #max_features is the number of features to consider when looking for the best split
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],#max_depth is the maximum depth of the tree
    'criterion': ['gini', 'entropy']}#criterion is the function to measure the quality of a split

#create a grid search object
grid_search = GridSearchCV(
    estimator = rfc,
    param_grid = param_grid,#param_grid is the parameter grid
    cv = 5,#cv is the number of folds in the cross-validation
    scoring='accuracy',#scoring is the metric to evaluate the performance of the model
    verbose = 1,#verbose is the level of verbosity and verbosity is the amount of information to print
    n_jobs = -1#n_jobs is the number of jobs to run in parallel
    )

#fit the grid search object
grid_search.fit(X, y)

#print the best parameters
print(f'best parameters: {grid_search.best_params_}')

# Randomized Search
Randomized Search is a technique used to find the best parameters for a model. We can do this by changing the values of the parameters.

In [None]:
#now randomized search cv
from sklearn.model_selection import RandomizedSearchCV
#defining random forest classifier
rfc = RandomForestClassifier()

#create a parameter grid
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'criterion': ['gini', 'entropy']
}

#create a randomized search object
random_search = RandomizedSearchCV(
    estimator = rfc,
    param_distributions = param_grid,
    cv = 5, # number of folds in the cross-validation
    scoring = 'accuracy', # metric to evaluate the performance of the model
    verbose = 1, # level of verbosity
    n_jobs = -1 # number of jobs to run in parallel
)

#fit the randomized search object
random_search.fit(X, y)

#print the best parameters
print(f'best parameters: {random_search.best_params_}')

Fitting 5 folds for each of 10 candidates, totalling 50 fits
best parameters: {'n_estimators': 400, 'max_depth': 60, 'criterion': 'entropy'}
