<a href="https://colab.research.google.com/github/pb-roshith/Hyperparameter-Tuning-Techniques/blob/main/Hyperparameter_Tuning_Techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('/content/breast_cancer.csv')
x = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [3]:
y = np.where(y == 2, 0, np.where(y == 4, 1, y))

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [5]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
classifier.fit(x_train, y_train)

In [6]:
from sklearn.metrics import mean_absolute_error
y_pred = classifier.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
print(mae)

0.029197080291970802


Grid Search

In [7]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'n_estimators': [100, 300, 500],
    'max_depth': [1, 3, 5]
}
rfr = RandomForestClassifier()
grid_search = GridSearchCV(estimator=rfr, param_grid=parameters, cv=5)
grid_search.fit(x_train, y_train)

In [8]:
grid_search.best_score_

np.float64(0.9744120100083403)

In [9]:
grid_search.best_params_

{'max_depth': 5, 'n_estimators': 100}

In [12]:
model = RandomForestClassifier(max_depth=5, n_estimators=100)
model.fit(x_train, y_train)

In [13]:
from sklearn.metrics import mean_absolute_error
y_pred = classifier.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
print(mae)

0.029197080291970802


Random Search

In [14]:
from sklearn.model_selection import RandomizedSearchCV
parameters = {
    'n_estimators': [100, 300, 500],
    'max_depth': [1, 3, 5]
}
rfr = RandomForestClassifier()
random_search = RandomizedSearchCV(estimator=rfr, param_distributions=parameters, cv=5, n_iter=15)
random_search.fit(x_train, y_train)



In [15]:
random_search.best_score_

np.float64(0.9725771476230192)

In [16]:
random_search.best_params_

{'n_estimators': 100, 'max_depth': 5}

Bayesian Search

In [17]:
!pip install hyperopt



In [19]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score

def model_train(params):
  model = RandomForestClassifier(**params)
  return cross_val_score(model, x_train, y_train, cv=5).mean()

param_space = {
    'n_estimators': hp.choice('n_estimators', range(100, 600)),
    'max_depth': hp.choice('max_depth', range(1, 7))
}

best = 99999999999

def f(params):
  global best
  error = model_train(params)
  if error > best:
    best = error
    print('new best:', best, params)
  return {'loss': -error, 'status': STATUS_OK}

In [20]:
trails = Trials()
best = fmin(
    f,
    param_space,
    algo=tpe.suggest,
    max_evals=15,
    trials=trails
)
print(best)

100%|██████████| 15/15 [00:40<00:00,  2.70s/trial, best loss: -0.9762301918265222]
{'max_depth': np.int64(5), 'n_estimators': np.int64(81)}


In [21]:
model = RandomForestClassifier(max_depth=5, n_estimators=81)
model.fit(x_train, y_train)

In [22]:
y_pred = classifier.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
print(mae)

0.029197080291970802
