와인 데이터셋에서 SVM 분류기를 훈련하기
- 목표: 와인의 화학 성분을 기반으로 재배자를 예측할 수 있는 분류 모델을 훈련
- SVM은 이진 분류기이므로 세 가지 클래스 분류를 위해 OvA 사용

In [17]:
from sklearn.datasets import load_wine
import pandas as pd

wine=load_wine(as_frame=True)
wine_df=pd.DataFrame(wine['data'], columns=wine['feature_names'])
wine_df['target']=wine['target']

In [28]:
wine_df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [30]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
 13  targe

In [33]:
X=wine.data.values
y=wine['target'].values

In [40]:
#데이터 분류하기
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test=train_test_split(X, y, random_state=42)

In [50]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

linearsvc=make_pipeline(StandardScaler(),
                        LinearSVC(dual=True, random_state=42))

In [51]:
linearsvc.fit(X_train, y_train)

In [52]:
#모델 성능 측정
from sklearn.model_selection import cross_val_score

cross_val_score(linearsvc, X_train, y_train).mean()

0.9774928774928775

In [53]:
from sklearn.svm import SVC

svc=make_pipeline(StandardScaler(),
                     SVC(random_state=42))
cross_val_score(svc, X_train, y_train).mean()

0.9698005698005698

In [57]:
#하이퍼파라미터 튜닝하기
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform

param_distrib={
    "svc__gamma": loguniform(0.001,0.1),
    "svc__C": uniform(1,10)
}

rnd_search_cv=RandomizedSearchCV(svc, param_distrib, n_iter=100, cv=5,
                                random_state=42)
rnd_search_cv.fit(X_train, y_train)
rnd_search_cv.best_estimator_

In [58]:
rnd_search_cv.best_score_

0.9925925925925926

In [59]:
rnd_search_cv.score(X_test, y_test)

0.9777777777777777