# Multiclass SVM 구현

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [6]:
y.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [9]:
y_train = pd.get_dummies(y_train) # 원핫인코딩 진행
print(y_train)

     setosa  versicolor  virginica
110       0           0          1
69        0           1          0
148       0           0          1
39        1           0          0
53        0           1          0
..      ...         ...        ...
64        0           1          0
91        0           1          0
81        0           1          0
51        0           1          0
0         1           0          0

[120 rows x 3 columns]


In [10]:
y_test = pd.get_dummies(y_test) 

In [3]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [4]:
X_train

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922],
       [-0.14887164, -1.06846325, -0.12636179, -0.24965767],
       [ 0.31817664, -0.60559109,  0.15444219,  0.13941922],
       [ 0.66846286, -0.83702717,  0.88453256,  0.91757299],
       [ 0.0846525 , -0.14271892,  0.77221097,  0.7878807 ],
       [-0.49915786, -0.14271892,  0.43524618,  0.39880381],
       [-0.26563371, -0.60559109,  0.65988937,  1.04726529],
       [ 2.18636979,  1.

In [5]:
X_test

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922],
       [ 0.66846286,  0.08871717,  0.99685416,  0.7878807 ],
       [-0.38239578,  1.0144615 , -1.36189934, -1.28719604],
       [-0.49915786,  0.78302542, -1.13725615, -1.28719604],
       [ 0.43493872, -0.60559109,  0.60372857,  0.7878807 ],
       [ 0.55170079, -1.7627715 ,  0.37908538,  0.13941922],
       [ 0.55170079,  0.55158933,  0.54756778,  0.52849611],
       [-1.19973028,  0.

### one vs rest

In [36]:
def onevsrest(X_train,y_train,X_test,y_test):
    clfs = []
    for i in range(len(y_train.value_counts())):
        clf = SVC(kernel='rbf', C=5, gamma = 5)
        clf.fit(X_train,y_train.iloc[:,i])
        clfs.append(clf)
        
    vote = np.zeros((len(X_test), 3), dtype=int)
    size = X_test.shape[0]
    y_pred = []
    
    for i in range(size):
        y_pred.append(np.argmax([clfs[0].decision_function(X_test)[i], clfs[1].decision_function(X_test)[i], clfs[2].decision_function(X_test)[i]]))
        
        # test를 진행하기 위해 0,1,2로 되어있던 데이터를 다시 문자 label로 변환
    y_pred = pd.DataFrame(y_pred).replace({0:'setosa', 1:'versicolor', 2:'virginica'})
    y_pred = pd.get_dummies(y_pred)
    print(y_pred)
    print('Accuacy : {: .5f}'.format(accuracy_score(y_test, y_pred)))
    

In [35]:
onevsrest(X_train,y_train,X_test,y_test)

    0_setosa  0_versicolor  0_virginica
0          0             1            0
1          0             1            0
2          0             1            0
3          0             0            1
4          0             0            1
5          0             0            1
6          1             0            0
7          0             0            1
8          1             0            0
9          0             1            0
10         0             0            1
11         1             0            0
12         1             0            0
13         0             0            1
14         0             1            0
15         0             1            0
16         1             0            0
17         0             1            0
18         0             0            1
19         0             0            1
20         1             0            0
21         0             0            1
22         0             1            0
23         0             1            0


### one vs one

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [93]:
X_train.shape

(120, 4)

In [94]:
y_train.shape

(120,)

In [95]:
y.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [96]:
ver_vir = y_train[y_train != "setosa"] # versicolor인지 versicolor인지
set_vir = y_train[y_train != "versicolor"] # setosa인지 virginica인지
set_ver = y_train[y_train != "virginica"] # setosa인지 versicolor인지 

In [90]:
y_ver_vir = pd.get_dummies(ver_vir, drop_first=True) # 2번 꽃인지 1번 꽃인지 파악
y_set_vir = pd.get_dummies(set_vir, drop_first=True) # 2번 꽃인지 0번 꽃인지 파악
y_set_ver = pd.get_dummies(set_ver, drop_first=True) # 1번 꽃인지 0번 꽃인지 파악

In [99]:
x_ver_vir = X_train[y_train != "setosa"] # versicolor인지 versicolor인지
x_set_vir = X_train[y_train != "versicolor"] # setosa인지 virginica인지
x_set_ver = X_train[y_train != "virginica"] # setosa인지 versicolor인지 

In [123]:
def onevsone(X_test, labels):
    clfs = []
    results = []
    
    x_ver_vir = X_train[y_train != "setosa"] # versicolor인지 versicolor인지
    x_set_vir = X_train[y_train != "versicolor"] # setosa인지 virginica인지
    x_set_ver = X_train[y_train != "virginica"] # setosa인지 versicolor인지 
    
    ver_vir = y_train[y_train != "setosa"] # versicolor인지 versicolor인지
    set_vir = y_train[y_train != "versicolor"] # setosa인지 virginica인지
    set_ver = y_train[y_train != "virginica"] # setosa인지 versicolor인지 
    
    y_ver_vir = pd.get_dummies(ver_vir, drop_first=True) 
    y_set_vir = pd.get_dummies(set_vir, drop_first=True) 
    y_set_ver = pd.get_dummies(set_ver, drop_first=True) 
    
    x_list = [x_ver_vir,x_set_vir,x_set_ver]
    y_list = [y_ver_vir,y_set_vir,y_set_ver]

    for i in range(len(y_list)):
        clf = SVC(kernel='rbf', C=5, gamma = 5)
        clf.fit(x_list[i],y_list[i])
        pred = clf.predict(X_test)
        results.append(pred)
    
    results = np.array(results).T #[d,d,d] 이렇게 나타나도록 표현 
    pred = []
    for result in range(len(results)):
        vote = np.array([0, 0, 0])
        
        # 0번부터 순회함
        if results[result][0] == 1: # 1번 꽃이다.
            vote[1] += 1
        elif results[result][0] == 0: # 0번 꽃이다.
            vote[0] += 1
            
        if results[result][1] == 1: # 2번 꽃이다.
            vote[2] += 1
        elif results[result][1] == 0: # 0번 꽃이다.
            vote[0] += 1
            
        if results[result][2] == 1: # 2번 꽃이다.
            vote[2] += 1
        elif results[result][2] == 0: # 1번 꽃이다.
            vote[1] += 1
            
        pred.append(labels[vote.argmax()]) # voting 된 점수를 꽃 이름으로 바꿔서 append
    
    
    print(f"prediction : \n {results}")
    
    return pred

In [124]:
labels = ["setosa", "versicolor", "virginica"]

prediction = onevsone( X_test, labels)
accuracy_score(y_test,prediction)

prediction : 
 [[0 1 1]
 [0 1 1]
 [1 1 1]
 [1 1 1]
 [1 1 1]
 [1 1 1]
 [1 0 0]
 [1 1 1]
 [1 0 0]
 [0 1 1]
 [1 1 1]
 [1 0 0]
 [1 0 0]
 [1 1 1]
 [0 1 1]
 [0 1 1]
 [1 0 0]
 [0 1 1]
 [1 1 1]
 [1 1 1]
 [1 0 0]
 [1 1 1]
 [1 1 1]
 [0 1 1]
 [1 1 1]
 [1 0 0]
 [1 0 0]
 [1 1 1]
 [1 1 1]
 [0 1 1]]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0.3333333333333333