In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.datasets import load_iris
import numpy as np

iris_data = load_iris()
dt_clf = DecisionTreeClassifier(random_state=156)

data = iris_data.data
label = iris_data.target

score = cross_val_score(dt_clf, data, label, scoring="accuracy", cv=3)
print(f"교차 검증별 정확도 : {np.round(score,4)}")
print(f"평균 검증 정확도 : {np.round(np.mean(score),4)}")

교차 검증별 정확도 : [0.98 0.94 0.98]
평균 검증 정확도 : 0.9667


In [3]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X_train, X_test, y_train, y_test = \
 train_test_split(iris.data, iris.target, test_size=0.2, random_state=121)
dt_clf = DecisionTreeClassifier()

parameters = {"max_depth":[1,2,3], "min_samples_split":[2,3]}

In [4]:
import pandas as pd
 
grid_dt_clf = GridSearchCV(dt_clf, param_grid=parameters, cv=3, refit=True, return_train_score=True)
grid_dt_clf.fit(X_train, y_train)

scores_df = pd.DataFrame(grid_dt_clf.cv_results_)
scores_df [["params", "mean_test_score", "rank_test_score",
           "split0_test_score", "split1_test_score", "split2_test_score"]]

Unnamed: 0,params,mean_test_score,rank_test_score,split0_test_score,split1_test_score,split2_test_score
0,"{'max_depth': 1, 'min_samples_split': 2}",0.7,5,0.7,0.7,0.7
1,"{'max_depth': 1, 'min_samples_split': 3}",0.7,5,0.7,0.7,0.7
2,"{'max_depth': 2, 'min_samples_split': 2}",0.958333,3,0.925,1.0,0.95
3,"{'max_depth': 2, 'min_samples_split': 3}",0.958333,3,0.925,1.0,0.95
4,"{'max_depth': 3, 'min_samples_split': 2}",0.975,1,0.975,1.0,0.95
5,"{'max_depth': 3, 'min_samples_split': 3}",0.975,1,0.975,1.0,0.95


In [5]:
print("grid_dt_clf.cv_results_ :\n",grid_dt_clf.cv_results_,sep="")

grid_dt_clf.cv_results_ :
{'mean_fit_time': array([0.00033196, 0.00033196, 0.        , 0.00066479, 0.00033243,
       0.        ]), 'std_fit_time': array([0.00046946, 0.00046946, 0.        , 0.00047008, 0.00047013,
       0.        ]), 'mean_score_time': array([0.        , 0.        , 0.        , 0.00033267, 0.        ,
       0.00033236]), 'std_score_time': array([0.        , 0.        , 0.        , 0.00047047, 0.        ,
       0.00047002]), 'param_max_depth': masked_array(data=[1, 1, 2, 2, 3, 3],
             mask=[False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_min_samples_split': masked_array(data=[2, 3, 2, 3, 2, 3],
             mask=[False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'params': [{'max_depth': 1, 'min_samples_split': 2}, {'max_depth': 1, 'min_samples_split': 3}, {'max_depth': 2, 'min_samples_split': 2}, {'max_depth': 2, 'min_samples_split': 3}, {'max_depth': 3, 'min_samp

In [6]:
print(f"grid_dt_clf.cv 최적파라미터 \n",grid_dt_clf.best_params_)
print(f"gridsearch 최고정확도 :,{grid_dt_clf.best_score_:4f}")

grid_dt_clf.cv 최적파라미터 
 {'max_depth': 3, 'min_samples_split': 2}
gridsearch 최고정확도 :,0.975000


In [8]:
print("--- 테스트 데이터 세트의 정확도 구하기 ---")
pred = grid_dt_clf.predict(X_test)
print(f"테스트 데이터 세트의 정확도 : {accuracy_score(y_test,pred):.4f}")

--- 테스트 데이터 세트의 정확도 구하기 ---
테스트 데이터 세트의 정확도 : 0.9667


In [9]:
estimator = grid_dt_clf.best_estimator_
pred = estimator.predict(X_test)
print(f"테스트 데이터 세트의 정확도 : {accuracy_score(y_test,pred):.4f}")

테스트 데이터 세트의 정확도 : 0.9667
