# Support Vector Machines Using Iris Toy Data set
------------

## Step 1: import required modules

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,confusion_matrix

## Step 2: import data

In [2]:
iris = load_iris()

In [3]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
X = iris.data[:, :2]  # get two features/columns ('sepal length (cm)',sepal width (cm)')
y = iris.target

## Step 3: Split Data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)

## Step 4: Create model using train data

In [6]:
model = SVC(kernel='linear', C=1, gamma=1) 
# there is various option associated with it, like changing kernel,gamma and C value. 
#Will discuss more 
model.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1, kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [7]:
len(X_train)

105

In [8]:
# get support vectors
model.support_vectors_

array([[5.2, 3.4],
       [5. , 3. ],
       [5. , 3.2],
       [4.9, 3.1],
       [4.5, 2.3],
       [4.9, 3. ],
       [5.4, 3.4],
       [5.8, 4. ],
       [6.6, 3. ],
       [5.7, 2.9],
       [5.6, 3. ],
       [5.8, 2.7],
       [5.7, 3. ],
       [6.2, 2.9],
       [6. , 2.2],
       [5.4, 3. ],
       [6.7, 3. ],
       [5.9, 3.2],
       [5.1, 2.5],
       [6. , 2.7],
       [6.3, 2.5],
       [6.1, 2.9],
       [6.5, 2.8],
       [7. , 3.2],
       [5.9, 3. ],
       [5.2, 2.7],
       [6.1, 3. ],
       [6.6, 2.9],
       [5.6, 3. ],
       [4.9, 2.4],
       [6.9, 3.1],
       [5.7, 2.8],
       [6.3, 2.3],
       [6.4, 2.9],
       [6.1, 2.8],
       [5.8, 2.6],
       [6.3, 2.8],
       [6.4, 3.1],
       [6.3, 2.7],
       [5.7, 2.5],
       [6. , 3. ],
       [5.8, 2.7],
       [6.2, 3.4],
       [6.4, 2.7],
       [5.8, 2.8],
       [6.1, 2.6],
       [6. , 2.2],
       [6.3, 3.4],
       [6.8, 3. ],
       [6.3, 3.3],
       [6.2, 2.8],
       [6.4, 3.2],
       [6.5,

In [9]:
len(model.support_vectors_)

61

In [10]:
model.n_support_

array([ 8, 28, 25])

In [11]:
model.support_

array([ 11,  17,  37,  40,  76,  89,  99, 102,   3,   5,   7,  10,  21,
        24,  29,  30,  39,  44,  45,  47,  58,  64,  65,  67,  71,  74,
        75,  77,  80,  84,  90,  93,  95,  96, 100, 103,   1,   2,  15,
        25,  27,  28,  31,  35,  41,  48,  53,  59,  61,  62,  66,  68,
        72,  82,  85,  88,  92,  94,  97,  98, 101])

## Step 5: Predict Output using test data

In [12]:
y_pred= model.predict(X_test)

In [13]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0])

In [14]:
y_pred

array([1, 0, 2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 2, 2, 1, 1, 2, 0, 1,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 1, 2, 0, 0, 0, 1, 2, 2, 0,
       0])

In [15]:
y_test-y_pred

array([ 0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,
        0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,
        0,  1, -1,  0,  0,  0,  1, -1, -1,  0,  0])

In [16]:
36/45

0.8

## Step 6: find the performance /accuracy

In [17]:
accuracy_score(y_test,y_pred)

0.8

In [18]:
confusion_matrix(y_test,y_pred)

array([[19,  0,  0],
       [ 0,  7,  6],
       [ 0,  3, 10]], dtype=int64)

## Step 7: Hyper Parameters Tuning of SVM(SVC)
1.	Tuning parameters effectively improves the model performance. Let’s look at the list of parameters available with SVM.



In [21]:
from sklearn.model_selection import GridSearchCV
def svc_param_selection(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    kernels = ['linear', 'poly', 'rbf']
    param_grid = {'kernel':kernels,'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(model, param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return (grid_search.best_params_,grid_search.best_score_)

In [22]:
svc_param_selection(X_train,y_train,5)



({'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}, 0.819047619047619)

In [None]:
import matplotlib.pyplot as plt
plt.scatter() # cov