In [2]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import load_iris

In [9]:
iris = load_iris()
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=121, test_size=0.2)
dtree = DecisionTreeClassifier()

In [10]:
parameters = {'max_depth' : [1,2,3], 'min_samples_split' : [2,3]}

In [11]:
grid_dtree = GridSearchCV(dtree, param_grid=parameters, cv=3, refit=True)

In [12]:
grid_dtree.fit(x_train, y_train)

GridSearchCV(cv=3, estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [1, 2, 3], 'min_samples_split': [2, 3]})

In [14]:
scores_df = pd.DataFrame(grid_dtree.cv_results_)
scores_df[['params', 'mean_test_score', 'rank_test_score',
         'split0_test_score', 'split1_test_score', 'split2_test_score']]

Unnamed: 0,params,mean_test_score,rank_test_score,split0_test_score,split1_test_score,split2_test_score
0,"{'max_depth': 1, 'min_samples_split': 2}",0.7,5,0.7,0.7,0.7
1,"{'max_depth': 1, 'min_samples_split': 3}",0.7,5,0.7,0.7,0.7
2,"{'max_depth': 2, 'min_samples_split': 2}",0.958333,3,0.925,1.0,0.95
3,"{'max_depth': 2, 'min_samples_split': 3}",0.958333,3,0.925,1.0,0.95
4,"{'max_depth': 3, 'min_samples_split': 2}",0.975,1,0.975,1.0,0.95
5,"{'max_depth': 3, 'min_samples_split': 3}",0.975,1,0.975,1.0,0.95


In [15]:
grid_dtree.cv_results_

{'mean_fit_time': array([0.        , 0.        , 0.00033323, 0.00066694, 0.00066638,
        0.00033307]),
 'std_fit_time': array([0.        , 0.        , 0.00047126, 0.00047163, 0.00047121,
        0.00047103]),
 'mean_score_time': array([0.00066145, 0.00033259, 0.00033347, 0.00033482, 0.00033379,
        0.00033307]),
 'std_score_time': array([0.0004678 , 0.00047036, 0.0004716 , 0.00047351, 0.00047204,
        0.00047103]),
 'param_max_depth': masked_array(data=[1, 1, 2, 2, 3, 3],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_min_samples_split': masked_array(data=[2, 3, 2, 3, 2, 3],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'max_depth': 1, 'min_samples_split': 2},
  {'max_depth': 1, 'min_samples_split': 3},
  {'max_depth': 2, 'min_samples_split': 2},
  {'max_depth': 2, 'min_samples_split': 3},
  {'max_depth': 3, 'min_sample

In [17]:
print("최적 파라미터 : ", grid_dtree.best_params_)
print("최고 정확도 : {0:.4f}".format(grid_dtree.best_score_))

최적 파라미터 :  {'max_depth': 3, 'min_samples_split': 2}
최고 정확도 : 0.9750


In [18]:
estimator = grid_dtree.best_estimator_

In [21]:
pred = estimator.predict(x_test)
print("테스트 정확도 : {0:.4f}".format(accuracy_score(y_test, pred)))

테스트 정확도 : 0.9667
