# 개별코드 

In [1]:
# 데이터 불러오기
from sklearn import datasets

raw_wine = datasets.load_wine()

In [2]:
# 피쳐, 타깃 데이터 지정
X = raw_wine.data
y = raw_wine.target

In [3]:
# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split

X_tn, X_te, y_tn, y_te=train_test_split(X,y,random_state=0)

In [4]:
# 데이터 표준화
from sklearn.preprocessing import StandardScaler

std_scale = StandardScaler()

std_scale.fit(X_tn)

X_tn_std = std_scale.transform(X_tn)
X_te_std  = std_scale.transform(X_te)

In [5]:
# 그리드 서치 학습
from sklearn import svm 
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

param_grid= {'kernel': ('linear', 'rbf'),
            'C': [0.5, 1, 10, 100]}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
svc = svm.SVC(random_state=0)
grid_cv = GridSearchCV(svc, param_grid, cv=kfold, scoring='accuracy')
grid_cv.fit(X_tn_std, y_tn)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True),
             estimator=SVC(random_state=0),
             param_grid={'C': [0.5, 1, 10, 100], 'kernel': ('linear', 'rbf')},
             scoring='accuracy')

In [6]:
# 그리드 서치 결과 확인
grid_cv.cv_results_

{'mean_fit_time': array([0.00160146, 0.00100083, 0.00060053, 0.00100093, 0.00060053,
        0.00060043, 0.00100088, 0.00080066]),
 'std_fit_time': array([1.74518899e-03, 9.53674316e-08, 4.90329667e-04, 1.78416128e-07,
        4.90329667e-04, 4.90251785e-04, 2.13248060e-07, 4.00328647e-04]),
 'mean_score_time': array([0.00020018, 0.00020018, 0.00040035, 0.00020018, 0.00020018,
        0.00040045, 0.        , 0.00040045]),
 'std_score_time': array([0.00040035, 0.00040035, 0.00049033, 0.00040035, 0.00040035,
        0.00049045, 0.        , 0.00049045]),
 'param_C': masked_array(data=[0.5, 0.5, 1, 1, 10, 10, 100, 100],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf',
                    'linear', 'rbf'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=ob

In [7]:
# 그리드 서치 결과 학인(데이터프레임)
import numpy as np
import pandas as pd

np.transpose(pd.DataFrame(grid_cv.cv_results_))

Unnamed: 0,0,1,2,3,4,5,6,7
mean_fit_time,0.001601,0.001001,0.000601,0.001001,0.000601,0.0006,0.001001,0.000801
std_fit_time,0.001745,0.0,0.00049,0.0,0.00049,0.00049,0.0,0.0004
mean_score_time,0.0002,0.0002,0.0004,0.0002,0.0002,0.0004,0.0,0.0004
std_score_time,0.0004,0.0004,0.00049,0.0004,0.0004,0.00049,0.0,0.00049
param_C,0.5,0.5,1,1,10,10,100,100
param_kernel,linear,rbf,linear,rbf,linear,rbf,linear,rbf
params,"{'C': 0.5, 'kernel': 'linear'}","{'C': 0.5, 'kernel': 'rbf'}","{'C': 1, 'kernel': 'linear'}","{'C': 1, 'kernel': 'rbf'}","{'C': 10, 'kernel': 'linear'}","{'C': 10, 'kernel': 'rbf'}","{'C': 100, 'kernel': 'linear'}","{'C': 100, 'kernel': 'rbf'}"
split0_test_score,0.888889,0.962963,0.888889,0.925926,0.888889,0.925926,0.888889,0.925926
split1_test_score,0.962963,1.0,0.962963,0.962963,0.962963,0.962963,0.962963,0.962963
split2_test_score,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963


In [8]:
# 베스트 스코어
grid_cv.best_score_

0.9774928774928775

In [9]:
# 베스트 하이퍼파라미터
grid_cv.best_params_

{'C': 0.5, 'kernel': 'rbf'}

In [10]:
# 최종 모형
clf = grid_cv.best_estimator_
print(clf)

SVC(C=0.5, random_state=0)


In [11]:
# 크로스 밸리데이션 스코어 확인(1)
from sklearn.model_selection import cross_validate

metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
cv_scores = cross_validate(clf, X_tn_std, y_tn, 
                           cv=kfold, scoring=metrics)
cv_scores

{'fit_time': array([0.00100136, 0.00100136, 0.002002  , 0.00100112, 0.00100064]),
 'score_time': array([0.00200129, 0.00100064, 0.00200152, 0.002002  , 0.00200152]),
 'test_accuracy': array([0.96296296, 1.        , 0.96296296, 0.96153846, 1.        ]),
 'test_precision_macro': array([0.96296296, 1.        , 0.96969697, 0.96969697, 1.        ]),
 'test_recall_macro': array([0.96666667, 1.        , 0.96296296, 0.95833333, 1.        ]),
 'test_f1_macro': array([0.9628483 , 1.        , 0.96451914, 0.96190476, 1.        ])}

In [12]:
# 크로스 밸리데이션 스코어 확인(2)
from sklearn.model_selection import cross_val_score

cv_score = cross_val_score(clf, X_tn_std, y_tn, 
                         cv=kfold, scoring='accuracy')
print(cv_score)
print(cv_score.mean())
print(cv_score.std())

[0.96296296 1.         0.96296296 0.96153846 1.        ]
0.9774928774928775
0.01838434849561446


In [13]:
# 예측
pred_svm = clf.predict(X_te_std)
print(pred_svm)

[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 1 0 1 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [14]:
# 정확도
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_te, pred_svm)
print(accuracy)

1.0


In [15]:
# confusion matrix 확인 
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_te, pred_svm)
print(conf_matrix)

[[16  0  0]
 [ 0 21  0]
 [ 0  0  8]]


In [16]:
# 분류 레포트 확인
from sklearn.metrics import classification_report

class_report = classification_report(y_te, pred_svm)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00         8

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



# 통합코드

In [17]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm 
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# 데이터 불러오기
raw_wine = datasets.load_wine()

# 피쳐, 타깃 데이터 지정
X = raw_wine.data
y = raw_wine.target

# 트레이닝/테스트 데이터 분할
X_tn, X_te, y_tn, y_te=train_test_split(X,y,random_state=0)

# 데이터 표준화
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std  = std_scale.transform(X_te)

# 그리드 서치 학습
param_grid= {'kernel': ('linear', 'rbf'),
            'C': [0.5, 1, 10, 100]}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
svc = svm.SVC(random_state=0)
grid_cv = GridSearchCV(svc, param_grid, cv=kfold, scoring='accuracy')
grid_cv.fit(X_tn_std, y_tn)

# 그리드 서치 결과 확인
print(grid_cv.cv_results_)
np.transpose(pd.DataFrame(grid_cv.cv_results_))

# 베스트 스코어
grid_cv.best_score_

# 베스트 하이퍼파라미터
grid_cv.best_params_

# 최종 모형
clf = grid_cv.best_estimator_
print(clf)

# 크로스 밸리데이션 스코어 확인(1)
metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
cv_scores = cross_validate(clf, X_tn_std, y_tn, 
                           cv=kfold, scoring=metrics)

# 크로스 밸리데이션 스코어 확인(2)
from sklearn.model_selection import cross_val_score

cv_score = cross_val_score(clf, X_tn_std, y_tn, 
                         cv=kfold, scoring='accuracy')
print(cv_score)
print(cv_score.mean())
print(cv_score.std())

# 예측
pred_svm = clf.predict(X_te_std)
print(pred_svm)

# 정확도
accuracy = accuracy_score(y_te, pred_svm)
print(accuracy)

# confusion matrix 확인 
conf_matrix = confusion_matrix(y_te, pred_svm)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_te, pred_svm)
print(class_report)

{'mean_fit_time': array([0.0008008 , 0.00080075, 0.00060062, 0.00100088, 0.00040035,
       0.00060058, 0.00060048, 0.00100098]), 'std_fit_time': array([4.00400233e-04, 4.00376359e-04, 4.90407542e-04, 2.61174468e-07,
       4.90329667e-04, 4.90368586e-04, 4.90290718e-04, 2.43140197e-07]), 'mean_score_time': array([0.0004005 , 0.00060043, 0.00020013, 0.00040035, 0.0004004 ,
       0.00040026, 0.00020022, 0.00020013]), 'std_score_time': array([0.0004905 , 0.00049025, 0.00040026, 0.00049033, 0.00049039,
       0.00049021, 0.00040045, 0.00040026]), 'param_C': masked_array(data=[0.5, 0.5, 1, 1, 10, 10, 100, 100],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf',
                   'linear', 'rbf'],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'params':