## 1. Classification with SVM:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC      #SVM에서 SVC(분류예측)가져옴 SVR(회귀)
from sklearn import metrics
from sklearn.datasets import load_iris
warnings.filterwarnings(action='ignore')                  # Turn off the warnings.
%matplotlib inline

### 1.1. Read in data:

In [2]:
#데이터 불러오기
data = load_iris()

In [3]:
#설명변수
X = data['data']
columns = list(data['feature_names'])
print(columns)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [4]:
#반응변수
Y = data['target']
labels = list(data['target_names'])
print(labels)

['setosa', 'versicolor', 'virginica']


In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)   #train,test 데이터로 쪼갬

### 1.2. SVM hyperparameter optimization (RBF kernel): 
#### SVM 하이퍼 파라미터 최적화 (RBF kernel 사용)

C     : Penalty parameter. <br>
gamma : kernel parameter ($\gamma$).

In [6]:
C_grid = 0.02*np.arange(1,20)      #C: 최적화 유류의 최대허용범위(패널티 파라미터)
gamma_grid = 0.02*np.arange(1,50)  #gamma: 커널 파리미터
                                   #두개 리스트 만들어 그리드 

parameters = {'C': C_grid, 'gamma' : gamma_grid}  #parameters 딕셔너리 생성

gridCV = GridSearchCV(SVC(kernel='rbf'), parameters, cv=10, n_jobs=-1)     #GridSearchCV로 최적의 C,gamma,하이퍼 파라미터 조합 만듬       
                                                                           #kernal은 rdf, parameters딕셔너리 넣음

gridCV.fit(X_train, Y_train)
best_C = gridCV.best_params_['C']                      #딕셔너리에서 C의 베스트 파라미터 뽑음
best_gamma = gridCV.best_params_['gamma']              #gamma의 베스트 파라미터 뽑음

In [7]:
print("SVM best C : " + str(best_C))
print("SVM best gamma : " + str(best_gamma))

SVM best C : 0.22
SVM best gamma : 0.58


In [8]:
SVM_best = SVC(kernel='rbf', C=best_C,gamma=best_gamma)
SVM_best.fit(X_train, Y_train);
Y_pred = SVM_best.predict(X_test)
print( "SVM best accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

#97.8%의 정확도

SVM best accuracy : 0.978


### 1.3. SVM hyperparameter optimization (Polynomial kernel):
#### SVM 하이퍼 파라미터 최적화 (Polynomial kernel 사용)

In [9]:
C_grid = 0.0001*np.arange(1,30)
gamma_grid = 0.01*np.arange(1,30)
parameters = {'C': C_grid, 'gamma' : gamma_grid}
gridCV = GridSearchCV(SVC(kernel='poly'), parameters, cv=10, n_jobs=-1)             
gridCV.fit(X_train, Y_train)
best_C = gridCV.best_params_['C']
best_gamma = gridCV.best_params_['gamma']

In [10]:
print("SVM best C : " + str(best_C))
print("SVM best gamma : " + str(best_gamma))

SVM best C : 0.0011
SVM best gamma : 0.29


In [11]:
SVM_best = SVC(kernel='poly', C=best_C,gamma=best_gamma)
SVM_best.fit(X_train, Y_train);
Y_pred = SVM_best.predict(X_test)
print( "SVM best accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

#100%로 나옴

SVM best accuracy : 1.0
