<a href="https://colab.research.google.com/github/matsu641/HyperParameterTuning/blob/main/HyperParameterTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [3]:
dataset = load_breast_cancer()

In [4]:
t = dataset.target
x = dataset.data

In [5]:
x.shape, t.shape

((569, 30), (569,))

In [10]:
train_x_val, test_x, train_t_val, test_t = train_test_split(x, t, test_size=0.2, random_state=1)

In [12]:
train_x, x_val, train_t, t_val = train_test_split(train_x_val, train_t_val, test_size=0.3, random_state=1)

In [9]:
train_x.shape, test_x.shape, train_t.shape, test_t.shape

((455, 30), (114, 30), (455,), (114,))

In [22]:
dtree = DecisionTreeClassifier(max_depth=10, min_samples_split=30, random_state=0)

In [23]:
dtree.fit(train_x, train_t)

In [24]:
DecisionTreeClassifier(random_state=0)

In [25]:
print(f'train accuracy: {accuracy_score(train_t, dtree.predict(train_x))}')
print(f'validation accuracy: {accuracy_score(t_val, dtree.predict(x_val))}')

train accuracy: 0.9308176100628931
validation accuracy: 0.9562043795620438


In [27]:
print(f'test accuracy: {accuracy_score(test_t, dtree.predict(test_x))}')

test accuracy: 0.9298245614035088


**Grid search**

In [28]:
estimator = DecisionTreeClassifier(random_state=0)

In [29]:
params_grid = [{
    'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'min_samples_split': [2, 3,30]
}]

In [30]:
cv = 5

In [35]:
tuned_model = GridSearchCV(estimator=estimator, param_grid=params_grid, cv=cv, return_train_score=False)

In [36]:
tuned_model.fit(train_x, train_t)

In [42]:
pd.DataFrame(tuned_model.cv_results_).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
mean_fit_time,0.008447,0.010754,0.006851,0.005541,0.00563,0.005512,0.006844,0.006767,0.009274,0.008171,...,0.00807,0.00788,0.008941,0.007107,0.008292,0.007442,0.007522,0.00742,0.012007,0.00837
std_fit_time,0.003753,0.001835,0.003671,0.000137,0.000273,0.000056,0.000354,0.000373,0.002946,0.001926,...,0.001169,0.001264,0.002904,0.000763,0.001329,0.00076,0.000941,0.00071,0.004205,0.002675
mean_score_time,0.00534,0.00142,0.001262,0.001275,0.001319,0.001322,0.001381,0.001296,0.001431,0.001413,...,0.001313,0.001301,0.00141,0.001335,0.001377,0.001385,0.001308,0.001341,0.001457,0.001368
std_score_time,0.004928,0.000076,0.000051,0.000012,0.000079,0.000062,0.000071,0.000052,0.000185,0.000108,...,0.000043,0.000038,0.000117,0.000035,0.000057,0.000067,0.000057,0.000048,0.000125,0.000065
param_max_depth,1,1,1,2,2,2,3,3,3,4,...,7,8,8,8,9,9,9,10,10,10
param_min_samples_split,2,3,30,2,3,30,2,3,30,2,...,30,2,3,30,2,3,30,2,3,30
params,"{'max_depth': 1, 'min_samples_split': 2}","{'max_depth': 1, 'min_samples_split': 3}","{'max_depth': 1, 'min_samples_split': 30}","{'max_depth': 2, 'min_samples_split': 2}","{'max_depth': 2, 'min_samples_split': 3}","{'max_depth': 2, 'min_samples_split': 30}","{'max_depth': 3, 'min_samples_split': 2}","{'max_depth': 3, 'min_samples_split': 3}","{'max_depth': 3, 'min_samples_split': 30}","{'max_depth': 4, 'min_samples_split': 2}",...,"{'max_depth': 7, 'min_samples_split': 30}","{'max_depth': 8, 'min_samples_split': 2}","{'max_depth': 8, 'min_samples_split': 3}","{'max_depth': 8, 'min_samples_split': 30}","{'max_depth': 9, 'min_samples_split': 2}","{'max_depth': 9, 'min_samples_split': 3}","{'max_depth': 9, 'min_samples_split': 30}","{'max_depth': 10, 'min_samples_split': 2}","{'max_depth': 10, 'min_samples_split': 3}","{'max_depth': 10, 'min_samples_split': 30}"
split0_test_score,0.921875,0.921875,0.921875,0.890625,0.890625,0.890625,0.875,0.875,0.84375,0.90625,...,0.84375,0.859375,0.90625,0.84375,0.859375,0.90625,0.84375,0.859375,0.90625,0.84375
split1_test_score,0.875,0.875,0.875,0.90625,0.90625,0.90625,0.921875,0.921875,0.921875,0.921875,...,0.921875,0.90625,0.90625,0.921875,0.90625,0.90625,0.921875,0.90625,0.90625,0.921875
split2_test_score,0.921875,0.921875,0.921875,0.921875,0.921875,0.921875,0.953125,0.953125,0.921875,0.953125,...,0.921875,0.953125,0.953125,0.921875,0.953125,0.953125,0.921875,0.953125,0.953125,0.921875


In [43]:
tuned_model.best_params_ # {'max_depth': 4, 'min_samples_split': 2} is the best

{'max_depth': 4, 'min_samples_split': 2}

In [44]:
best_model = tuned_model.best_estimator_

In [45]:
print(f'train accuracy: {accuracy_score(train_t, best_model.predict(train_x))}')
print(f'validation accuracy: {accuracy_score(t_val, best_model.predict(x_val))}')

train accuracy: 0.9937106918238994
validation accuracy: 0.9343065693430657
