# Multiclass SVM 구현

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris')
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [None]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [None]:
X_train

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922],
       [-0.14887164, -1.06846325, -0.12636179, -0.24965767],
       [ 0.31817664, -0.60559109,  0.15444219,  0.13941922],
       [ 0.66846286, -0.83702717,  0.88453256,  0.91757299],
       [ 0.0846525 , -0.14271892,  0.77221097,  0.7878807 ],
       [-0.49915786, -0.14271892,  0.43524618,  0.39880381],
       [-0.26563371, -0.60559109,  0.65988937,  1.04726529],
       [ 2.18636979,  1.

In [None]:
X_test

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922],
       [ 0.66846286,  0.08871717,  0.99685416,  0.7878807 ],
       [-0.38239578,  1.0144615 , -1.36189934, -1.28719604],
       [-0.49915786,  0.78302542, -1.13725615, -1.28719604],
       [ 0.43493872, -0.60559109,  0.60372857,  0.7878807 ],
       [ 0.55170079, -1.7627715 ,  0.37908538,  0.13941922],
       [ 0.55170079,  0.55158933,  0.54756778,  0.52849611],
       [-1.19973028,  0.

### 세개의 클래스 one-hot encoding 후 각각 binary SVM training 하여 이 결과를 조합하여 multiclass SVM 구현

In [None]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output =False)
y_train_en = encoder.fit_transform(y_train.values.reshape(-1, 1))
y_test_en = encoder.transform(y_test.values.reshape(-1, 1))

In [None]:
y_train_en[:5]

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.]])

### one vs rest 방법으로

In [None]:
classifier_list = []

for i in range(y_train_en.shape[1]):
    y_train_binary = y_train_en[:, i]
    y_test_binary = y_test_en[:, i]
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train_binary)  # i번째 클래스에 대해 모델 훈련
    classifier_list.append(classifier)

y_pred = np.zeros((X_test.shape[0], 3))
for i, classifier in enumerate(classifier_list):
    y_pred[:, i] = classifier.predict(X_test)

y_pred[:5]

array([[0., 0., 0.],
       [0., 1., 0.],
       [0., 1., 1.],
       [1., 1., 0.],
       [0., 0., 1.]])

### decision function 활용

In [None]:
y_pred_decision_function = np.zeros((X_test.shape[0], len(classifier_list)))  # 결정 함수 값 저장을 위한 배열 초기화

for i, classifier in enumerate(classifier_list):
    y_pred_decision_function[:, i] = classifier.decision_function(X_test)

y_pred_final = np.argmax(y_pred_decision_function, axis=1)

# 결과값 0, 1, 2로 나와 dummy화
num_classes = y_test.nunique()
identity_matrix = np.eye(num_classes) # np.eye 함수를 사용하여 num_classes x num_classes 크기의 단위 행렬 생성

y_pred_dummy = identity_matrix[y_pred_final]

y_pred_dummy[:5]

[[-1.22321399 -0.44100861 -1.7637364 ]
 [-1.5052002   0.20713428 -1.57509733]
 [-2.25464867  0.79626159  0.05664667]
 [ 0.22876001  1.21178199 -6.93268365]
 [-1.57785413 -1.62934045  0.06571176]
 [-2.19327676 -0.71483036  0.79107858]
 [ 1.92124422 -2.70179713 -7.98745569]
 [-1.85097977 -1.05492184  0.19994587]
 [ 2.47035057 -3.61403502 -8.76301944]
 [-1.61667369  0.54730996 -1.61795729]
 [-2.04602096 -1.01435766  0.73294601]
 [ 1.62288553 -1.72078318 -8.32759115]
 [ 1.38390608 -1.34657837 -7.84439055]
 [-2.14549137 -0.45586725  0.316211  ]
 [-2.17399657  1.41812991 -1.22037599]
 [-1.30881043 -1.59713359 -0.78354649]
 [ 1.54584569 -1.52538163 -7.83385918]
 [-1.80857234  1.85447625 -2.38549496]
 [-2.28547009  0.78374249 -0.28216823]
 [-2.58206309 -1.7029484   2.27156025]
 [ 1.06928804 -0.3114437  -7.57604056]
 [-2.28749887 -1.04733409  1.10322929]
 [-1.97038233 -0.70072443 -0.0332711 ]
 [-1.54854705 -0.82903622 -1.00521846]
 [-2.24875632 -0.20987477  0.35314499]
 [ 1.82206125 -2.08786703

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
# 정확도 계산
accuracy = (y_pred_dummy == y_test_en).mean()
print('정확도', round(accuracy, 2))

정확도 0.89
