### 필요한 라이브러리 Import

In [1]:
import mglearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import cluster
from sklearn import metrics
from sklearn import ensemble

### Data load & Preprocess

In [2]:
data = pd.read_csv('fer2013/fer2013.csv')
data.head()

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


In [3]:
pixels_data = data.copy()

In [4]:
del pixels_data['Usage']

In [5]:
pixels_data.head()

Unnamed: 0,emotion,pixels
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [6]:
emotion = pixels_data[['emotion']].copy()
emotion.head()

Unnamed: 0,emotion
0,0
1,0
2,2
3,4
4,6


In [7]:
del pixels_data['emotion']
pixels_data.head()

Unnamed: 0,pixels
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,151 150 147 155 148 133 111 140 170 174 182 15...
2,231 212 156 164 174 138 161 173 182 200 106 38...
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


### Data split

In [8]:
from sklearn import model_selection

In [9]:
train_data, test_data, train_label, test_label = model_selection.train_test_split(pixels_data, emotion, test_size=0.3, random_state=0)

In [10]:
train_data = np.array(list(map(str.split, train_data['pixels'])), np.float32) 
test_data = np.array(list(map(str.split, test_data['pixels'])), np.float32) 

In [11]:
# 픽셀의 가장 큰 값인 255로 train_data, test data 정규화
train_data = train_data / 255.0
test_data = test_data / 255.0

In [12]:
train_data

array([[0.01176471, 0.00784314, 0.00784314, ..., 0.36862746, 0.4       ,
        0.42745098],
       [0.7921569 , 0.7764706 , 0.7411765 , ..., 0.6901961 , 0.68235296,
        0.69803923],
       [1.        , 1.        , 1.        , ..., 0.49411765, 0.45490196,
        0.4509804 ],
       ...,
       [0.23529412, 0.23921569, 0.2509804 , ..., 0.23921569, 0.22745098,
        0.25490198],
       [0.43137255, 0.49019608, 0.52156866, ..., 0.5921569 , 0.61960787,
        0.654902  ],
       [0.10588235, 0.11764706, 0.12941177, ..., 0.9647059 , 0.29803923,
        0.        ]], dtype=float32)

In [13]:
X_train = np.array(train_data)
X_test = np.array(test_data)

Y_train = np.array(train_label)
Y_test = np.array(test_label)

### Make a Model

In [None]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn import utils

In [None]:
svc = SVC()
print(svc)
svc.fit(X_train, Y_train)
print('디폴트 값')
print("Accuracy on Training set: {:.3f}".format(svc.score(X_train, Y_train)))
print("Accuracy on Test set: {:.3f}".format(svc.score(X_test, Y_test)))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)


### GridSearchCV for SVM

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C' : [0.1, 1, 10, 100, 1000, 10000], 
             'gamma' : [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=1)
# refit : 찾아진 가장 좋은 params로 estimator를 setting할 지 여부 (setting해줘야 곧바로 predict가 가능)
# verbose : 설명의 자세한 정도 (verbose를 3과 같이 바꿔보시면 더 자세하게 매 param set 마다의 결과를 확인할 수 있습니다.)
grid.fit(X_train, Y_train)
print('The best parameters are ', grid.best_params_)

In [None]:
svc_g = SVC(kernel='rbf', C=10, gamma=0.01)
svc_g.fit(X_train_scaled, y_train)

print('GridSearchCV 적용 결과')
print("Accuracy on Training set: {:.3f}".format(svc_g.score(X_train, Y_train)))
print("Accuracy on Test set: {:.3f}".format(svc_g.score(X_test, Y_test)))