# Mašina potpornih vektora

In [1]:
import pandas as pd

In [2]:
import numpy as np

## Podaci

In [3]:
df = pd.read_csv("../data/iris.csv")
df.head()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
features = df.columns[:-1]
features

Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'], dtype='object')

In [5]:
X = df[features]
y = df["Species"]
print(X.shape)
print(y.shape)

(150, 4)
(150,)


## Preprocesiranje

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [9]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [10]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Treniranje modela

In [11]:
from sklearn.svm import SVC

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
params = [
    {
        "C": [2**i for i in range(-3, 3)],
        "kernel": ["poly"], 
        "gamma": np.arange(0, 1, 0.1)
    }, 
    {
        "C": [2**i for i in range(-3, 3)],
        "kernel": ["linear"]
    }
]

In [14]:
model = GridSearchCV(SVC(), params, cv=5, scoring="accuracy", verbose=4)
model.fit(X_train, y_train)

Fitting 5 folds for each of 66 candidates, totalling 330 fits
[CV 1/5] END ...C=0.125, gamma=0.0, kernel=poly;, score=0.333 total time=   0.0s
[CV 2/5] END ...C=0.125, gamma=0.0, kernel=poly;, score=0.333 total time=   0.0s
[CV 3/5] END ...C=0.125, gamma=0.0, kernel=poly;, score=0.333 total time=   0.0s
[CV 4/5] END ...C=0.125, gamma=0.0, kernel=poly;, score=0.333 total time=   0.0s
[CV 5/5] END ...C=0.125, gamma=0.0, kernel=poly;, score=0.333 total time=   0.0s
[CV 1/5] END ...C=0.125, gamma=0.1, kernel=poly;, score=0.429 total time=   0.0s
[CV 2/5] END ...C=0.125, gamma=0.1, kernel=poly;, score=0.762 total time=   0.0s
[CV 3/5] END ...C=0.125, gamma=0.1, kernel=poly;, score=0.429 total time=   0.0s
[CV 4/5] END ...C=0.125, gamma=0.1, kernel=poly;, score=0.619 total time=   0.0s
[CV 5/5] END ...C=0.125, gamma=0.1, kernel=poly;, score=0.619 total time=   0.0s
[CV 1/5] END ...C=0.125, gamma=0.2, kernel=poly;, score=0.810 total time=   0.0s
[CV 2/5] END ...C=0.125, gamma=0.2, kernel=poly

[CV 1/5] END C=0.5, gamma=0.30000000000000004, kernel=poly;, score=0.905 total time=   0.0s
[CV 2/5] END C=0.5, gamma=0.30000000000000004, kernel=poly;, score=0.905 total time=   0.0s
[CV 3/5] END C=0.5, gamma=0.30000000000000004, kernel=poly;, score=0.857 total time=   0.0s
[CV 4/5] END C=0.5, gamma=0.30000000000000004, kernel=poly;, score=0.905 total time=   0.0s
[CV 5/5] END C=0.5, gamma=0.30000000000000004, kernel=poly;, score=0.905 total time=   0.0s
[CV 1/5] END .....C=0.5, gamma=0.4, kernel=poly;, score=0.905 total time=   0.0s
[CV 2/5] END .....C=0.5, gamma=0.4, kernel=poly;, score=0.952 total time=   0.0s
[CV 3/5] END .....C=0.5, gamma=0.4, kernel=poly;, score=0.905 total time=   0.0s
[CV 4/5] END .....C=0.5, gamma=0.4, kernel=poly;, score=0.905 total time=   0.0s
[CV 5/5] END .....C=0.5, gamma=0.4, kernel=poly;, score=1.000 total time=   0.0s
[CV 1/5] END .....C=0.5, gamma=0.5, kernel=poly;, score=1.000 total time=   0.0s
[CV 2/5] END .....C=0.5, gamma=0.5, kernel=poly;, scor

[CV 5/5] END C=4, gamma=0.30000000000000004, kernel=poly;, score=0.952 total time=   0.0s
[CV 1/5] END .......C=4, gamma=0.4, kernel=poly;, score=0.952 total time=   0.0s
[CV 2/5] END .......C=4, gamma=0.4, kernel=poly;, score=0.952 total time=   0.0s
[CV 3/5] END .......C=4, gamma=0.4, kernel=poly;, score=1.000 total time=   0.0s
[CV 4/5] END .......C=4, gamma=0.4, kernel=poly;, score=0.905 total time=   0.0s
[CV 5/5] END .......C=4, gamma=0.4, kernel=poly;, score=1.000 total time=   0.0s
[CV 1/5] END .......C=4, gamma=0.5, kernel=poly;, score=0.952 total time=   0.0s
[CV 2/5] END .......C=4, gamma=0.5, kernel=poly;, score=0.952 total time=   0.0s
[CV 3/5] END .......C=4, gamma=0.5, kernel=poly;, score=1.000 total time=   0.0s
[CV 4/5] END .......C=4, gamma=0.5, kernel=poly;, score=0.857 total time=   0.0s
[CV 5/5] END .......C=4, gamma=0.5, kernel=poly;, score=1.000 total time=   0.0s
[CV 1/5] END C=4, gamma=0.6000000000000001, kernel=poly;, score=0.905 total time=   0.0s
[CV 2/5] EN

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.125, 0.25, 0.5, 1, 2, 4],
                          'gamma': array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
                          'kernel': ['poly']},
                         {'C': [0.125, 0.25, 0.5, 1, 2, 4],
                          'kernel': ['linear']}],
             scoring='accuracy', verbose=4)

In [15]:
model.best_estimator_

SVC(C=0.125, kernel='linear')

In [16]:
model.best_estimator_.n_support_

array([ 7, 25, 18], dtype=int32)

In [17]:
model.best_estimator_.support_

array([ 11,  12,  20,  35,  51,  79,  84,   0,   1,  13,  19,  24,  28,
        33,  36,  52,  54,  57,  59,  60,  63,  64,  69,  70,  75,  86,
        90,  92,  93,  94, 101, 103,  10,  15,  21,  37,  40,  42,  49,
        53,  55,  66,  74,  78,  81,  82,  85,  88,  96,  98], dtype=int32)

## Evaluacija modela

In [18]:
from sklearn.metrics import accuracy_score

In [19]:
y_pred = model.predict(X_test)

In [20]:
accuracy_score(y_pred, y_test)

0.9111111111111111