# 以Scikit-learn SVM進行鳶尾花(Iris)品種的辨識

## 載入相關套件

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

## 載入資料集

In [2]:
X, y = datasets.load_iris(return_X_y=True) # 載入鳶尾花資料集

## 資料分割

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

## 特徵縮放

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

## 模型訓練

In [None]:
from sklearn.svm import SVC # sklearn.svm是路徑 , 指定載入 SVC 類別, 可以縮少記憶體佔用空間; 也可以避免與其他類別名稱衝突

# code svm01.ipynb 使用 from sklearn import svm, 占用記憶體空間較多

clf = SVC(probability=True, kernel='rbf') # svc(probability = True, kernel = 'rbf')
clf.fit(X_train_std, y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


## 模型評分

In [6]:
# 計算準確率
y_pred = clf.predict(X_test_std) # clf.predict()
print(clf.score(X_test_std, y_test))
print(f'{accuracy_score(y_test, y_pred)*100:.2f}%') 

1.0
100.00%


In [None]:
clf.support_vectors_ # 碰到margin的樣本點

array([[-1.7755952 , -0.29953818, -1.39930521, -1.36766658],
       [-0.21419243,  3.3190439 , -1.34128288, -1.09993413],
       [-0.57451615,  0.90665585, -1.22523822, -1.36766658],
       [-1.05494777,  1.14789465, -1.28326055, -0.83220169],
       [-1.6554873 , -1.74697102, -1.45732755, -1.23380036],
       [-0.21419243,  1.87161107, -1.22523822, -1.23380036],
       [-0.57451615,  0.90665585, -1.34128288, -1.09993413],
       [-1.7755952 ,  0.42417823, -1.45732755, -1.36766658],
       [-0.93483986,  0.66541704, -1.22523822, -0.96606791],
       [-1.89570311, -0.05829938, -1.57337221, -1.5015328 ],
       [-1.05494777, -0.05829938, -1.28326055, -1.36766658],
       [ 0.506455  ,  0.66541704,  0.51543171,  0.50646054],
       [ 0.26623919, -0.29953818,  0.51543171,  0.2387281 ],
       [-0.93483986, -1.26449341, -0.47094792, -0.16287057],
       [ 0.98688662,  0.18293943,  0.51543171,  0.37259432],
       [ 1.34721034,  0.42417823,  0.51543171,  0.2387281 ],
       [ 0.14613128, -0.

In [None]:
len(clf.support_vectors_) # len(), 支持向量的數量,在margin 線上的樣本數量

48

In [None]:
clf.support_ # margin 線上樣本的索引值  

array([  6,  15,  41,  54,  61,  64,  69,  74,  84, 104, 113,   0,  11,
        20,  32,  43,  45,  46,  50,  56,  77,  78,  79,  80,  98, 101,
       103, 105, 118, 119,   2,  19,  22,  27,  28,  35,  51,  53,  58,
        71,  76,  83,  87,  88,  89,  97, 107, 111], dtype=int32)

In [None]:
#clf.predict_proba(X_test)
clf.predict(X_test) # 預測新點

array([0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 2, 0, 0, 0, 2])

In [None]:
clf.predict_log_proba(X_test) # 預測新點的對數機率

array([[-1.17863623, -1.0989398 , -1.02421638],
       [-1.20309704, -1.11538454, -0.98900135],
       [-1.1780679 , -1.09855918, -1.02505716],
       [-1.17813473, -1.09860475, -1.02495748],
       [-1.37350817, -1.25616521, -0.77210584],
       [-1.25078145, -1.14938484, -0.92410268],
       [-1.18091437, -1.10051777, -1.02080904],
       [-1.20727267, -1.11858301, -0.98284229],
       [-1.2793391 , -1.16942124, -0.88859711],
       [-1.19763313, -1.11190316, -0.99652593],
       [-1.17962522, -1.0996187 , -1.0227406 ],
       [-1.22177953, -1.12877906, -0.96268333],
       [-1.22995872, -1.13437011, -0.95173473],
       [-1.179539  , -1.09954261, -1.02288477],
       [-1.17809129, -1.09857504, -1.02502235],
       [-1.28339807, -1.17222837, -0.88375145],
       [-1.17837157, -1.09876723, -1.02460343],
       [-1.19185174, -1.10803722, -1.00475287],
       [-1.17808265, -1.09856925, -1.02503515],
       [-1.17810428, -1.09858397, -1.02500292],
       [-1.18025694, -1.10005039, -1.021

In [None]:
clf.predict_proba(X_test) # 預測新點的機率

array([[0.30769808, 0.33322418, 0.35907774],
       [0.30026284, 0.32778921, 0.37194795],
       [0.307873  , 0.33335104, 0.35877596],
       [0.30785243, 0.33333585, 0.35881172],
       [0.25321707, 0.28474387, 0.46203906],
       [0.28628099, 0.31683161, 0.39688739],
       [0.3069979 , 0.33269878, 0.36030332],
       [0.29901167, 0.32674246, 0.37424587],
       [0.27822112, 0.31054662, 0.41123226],
       [0.30190794, 0.32893235, 0.3691597 ],
       [0.30739392, 0.33299803, 0.35960805],
       [0.29470526, 0.3234279 , 0.38186684],
       [0.29230464, 0.32162464, 0.38607071],
       [0.30742043, 0.33302337, 0.3595562 ],
       [0.30786581, 0.33334575, 0.35878845],
       [0.27709411, 0.3096761 , 0.41322979],
       [0.30777953, 0.33328169, 0.35893878],
       [0.30365845, 0.33020645, 0.3661351 ],
       [0.30786847, 0.33334768, 0.35878386],
       [0.30786181, 0.33334277, 0.35879542],
       [0.3071998 , 0.33285431, 0.35994589],
       [0.30723215, 0.33288466, 0.35988319],
       [0.