In [76]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [78]:
train = np.loadtxt("/content/drive/MyDrive/ML_Lab/report/5/lab5_train.csv", delimiter = ',')
test = np.loadtxt("/content/drive/MyDrive/ML_Lab/report/5/lab5_test.csv", delimiter = ',')

In [79]:
print("---Train---")
print(train)
print('\n')
print("---Test---")
print(test)

---Train---
[[  0.     0.     0.27 ...   6.    55.     0.  ]
 [  0.74   0.     0.   ...   7.    44.     1.  ]
 [  0.9    0.     0.9  ...  54.   162.     1.  ]
 ...
 [  0.     0.     0.   ...  39.    91.     1.  ]
 [  0.     0.     0.   ...   2.     5.     0.  ]
 [  0.     0.     0.   ...   6.     8.     0.  ]]


---Test---
[[1.70e-01 0.00e+00 2.60e-01 ... 2.00e+01 6.46e+02 0.00e+00]
 [0.00e+00 0.00e+00 0.00e+00 ... 1.90e+01 2.03e+02 0.00e+00]
 [6.80e-01 1.10e-01 1.10e-01 ... 1.64e+02 1.59e+03 1.00e+00]
 ...
 [0.00e+00 0.00e+00 5.40e-01 ... 1.20e+01 8.90e+01 0.00e+00]
 [9.00e-02 0.00e+00 9.00e-02 ... 4.94e+02 1.46e+03 1.00e+00]
 [6.30e-01 6.30e-01 6.30e-01 ... 1.90e+01 1.26e+02 1.00e+00]]


In [80]:
x_train = train[:,:-1]
y_train = train[:,-1]

x_test = test[:, :-1]
y_test = test[:,-1]

In [81]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [82]:
# validation set을 설정하여 하이퍼파라미터 찾기

def SVM_best_param_Val(x_train, y_train) :

  # validation set 설정
  x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, random_state = 0)

  # score, C, kernel 초기화
  best_score = 0
  best_C = 0
  best_kernel = ''

  C_list = [0.1, 0.5, 1, 5, 10]
  kernel_list = ['linear', 'rbf', 'poly', 'sigmoid']

  for C in C_list :
    for kernel in kernel_list :

      svm = SVC(C = C, kernel = kernel)
      svm.fit(x_train, y_train)

      score = svm.score(x_val, y_val)

      if score > best_score :
        best_score = score
        best_C = C
        best_kernel = kernel

        return best_score, best_C, best_kernel

In [83]:
SVM_best_param_Val(x_train, y_train)

(0.9310344827586207, 0.1, 'linear')

In [84]:
clf = SVC(C = 0.1, kernel = 'linear')
clf.fit(x_train, y_train)
print(clf.score(x_test, y_test))

0.9027777777777778


In [85]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

# 교차검증과 그리드 서치를 이용한 파라미터 찾기

def SVM_best_param_crossVal(x_train, y_train, n_splits) :

  kf = KFold(n_splits = n_splits, shuffle = True, random_state = 0)

  svm = SVC()

  param_grid = {'C' : [0.1, 0.5, 1, 5, 10], 'kernel' : ['linear', 'rbf', 'poly', 'sigmoid']}

  grid_search = GridSearchCV(estimator = svm, param_grid = param_grid, cv = kf)
  grid_search.fit(x_train, y_train)

  return grid_search.best_params_

In [86]:
SVM_best_param_crossVal(x_train, y_train, 5)

{'C': 1, 'kernel': 'linear'}

In [87]:
clf = SVC(C = 1, kernel = 'linear')
clf.fit(x_train, y_train)
print(clf.score(x_test, y_test))

0.9444444444444444


In [88]:
# 최적의 하이퍼파라미터를 이용하여 전체 train_set을 최종학습하는 함수

def SVM_best(x_train, y_train, best_C, best_kernel) :
  svm_final = SVC(C = best_C, kernel = best_kernel)

  svm_final.fit(x_train, y_train)

  return svm_final

In [89]:
svm_final_model = SVM_best(x_train, y_train, 1, 'linear')

In [90]:
# test_set을 활용하여 정확도를 프린트 하는 함수

def SVM_score(model, x_test, y_test) :
  print("final score: " + str(model.score(x_test, y_test)))

In [91]:
SVM_score(svm_final_model, x_test, y_test)

final score: 0.9444444444444444
