In [2]:
# 데이터 불러오기
from sklearn import datasets
raw_wine = datasets.load_wine()

In [3]:
# 피처, 타깃 데이터 지정
X = raw_wine.data
y = raw_wine.target

In [4]:
# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te = train_test_split(X,y,random_state=0) 

In [5]:
# 데이터 표준화
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

In [6]:
# 그리드 서치
from sklearn import svm
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

param_grid = {'kernel' : ('linear','rbf'),
              'C':[0.5, 1, 10, 100]}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
svc = svm.SVC(random_state=0)
grid_cv = GridSearchCV(svc, param_grid, cv=kfold, scoring='accuracy')
grid_cv.fit(X_tn_std, y_tn)

In [7]:
grid_cv.cv_results_

{'mean_fit_time': array([0.00411615, 0.00454178, 0.00295863, 0.00420103, 0.00369387,
        0.00334506, 0.00107894, 0.00342703]),
 'std_fit_time': array([0.00076088, 0.00397349, 0.00087252, 0.00084967, 0.00140318,
        0.00107502, 0.00133585, 0.00457958]),
 'mean_score_time': array([0.00162044, 0.00150285, 0.00222249, 0.00214024, 0.00118594,
        0.00210075, 0.00055971, 0.        ]),
 'std_score_time': array([0.00084214, 0.00147199, 0.00124621, 0.00094742, 0.00045649,
        0.00068573, 0.00078102, 0.        ]),
 'param_C': masked_array(data=[0.5, 0.5, 1, 1, 10, 10, 100, 100],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf',
                    'linear', 'rbf'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 0.5, 'k

In [8]:
import numpy as np
import pandas as pd
np.transpose(pd.DataFrame(grid_cv.cv_results_))

Unnamed: 0,0,1,2,3,4,5,6,7
mean_fit_time,0.004116,0.004542,0.002959,0.004201,0.003694,0.003345,0.001079,0.003427
std_fit_time,0.000761,0.003973,0.000873,0.00085,0.001403,0.001075,0.001336,0.00458
mean_score_time,0.00162,0.001503,0.002222,0.00214,0.001186,0.002101,0.00056,0.0
std_score_time,0.000842,0.001472,0.001246,0.000947,0.000456,0.000686,0.000781,0.0
param_C,0.5,0.5,1,1,10,10,100,100
param_kernel,linear,rbf,linear,rbf,linear,rbf,linear,rbf
params,"{'C': 0.5, 'kernel': 'linear'}","{'C': 0.5, 'kernel': 'rbf'}","{'C': 1, 'kernel': 'linear'}","{'C': 1, 'kernel': 'rbf'}","{'C': 10, 'kernel': 'linear'}","{'C': 10, 'kernel': 'rbf'}","{'C': 100, 'kernel': 'linear'}","{'C': 100, 'kernel': 'rbf'}"
split0_test_score,0.888889,0.962963,0.888889,0.925926,0.888889,0.925926,0.888889,0.925926
split1_test_score,0.962963,1.0,0.962963,0.962963,0.962963,0.962963,0.962963,0.962963
split2_test_score,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963


In [9]:
grid_cv.best_score_

0.9774928774928775

In [10]:
grid_cv.best_params_

{'C': 0.5, 'kernel': 'rbf'}

In [11]:
# 최종 모형
clf = grid_cv.best_estimator_
print(clf)

SVC(C=0.5, random_state=0)


In [12]:
# 크로스 밸리데이션 스코어확인_(1)
from sklearn.model_selection import cross_validate
metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
cv_scores = cross_validate(clf, X_tn_std, y_tn,
                           cv=kfold, scoring=metrics)
cv_scores

{'fit_time': array([0.        , 0.        , 0.00568652, 0.00874186, 0.00199485]),
 'score_time': array([0.01385522, 0.02677941, 0.0063808 , 0.00797701, 0.00786281]),
 'test_accuracy': array([0.96296296, 1.        , 0.96296296, 0.96153846, 1.        ]),
 'test_precision_macro': array([0.96296296, 1.        , 0.96969697, 0.96969697, 1.        ]),
 'test_recall_macro': array([0.96666667, 1.        , 0.96296296, 0.95833333, 1.        ]),
 'test_f1_macro': array([0.9628483 , 1.        , 0.96451914, 0.96190476, 1.        ])}

In [13]:
# 크로스 밸리데이션 스코어 확인_(2)
from sklearn.model_selection import cross_val_score
cv_score = cross_val_score(clf, X_tn_std, y_tn,
                           cv=kfold, scoring='accuracy')
print(cv_score.mean())
print(cv_score.std()) 

0.9774928774928775
0.01838434849561446


In [14]:
# 예측
pred_svm = clf.predict(X_te_std)
print(pred_svm)

[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 1 0 1 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [16]:
# 정확도
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_te, pred_svm)
print(accuracy)

1.0


In [17]:
#confusion matrix확인
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_te, pred_svm)
print(conf_matrix)

[[16  0  0]
 [ 0 21  0]
 [ 0  0  8]]


In [18]:
# 분류 리포트 확인
from sklearn.metrics import classification_report
class_report = classification_report(y_te, pred_svm)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00         8

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

