# Setup

First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:

In [None]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "svm"

# Look at Features Description for Iris Dataset

 1. sepal length in cm
 2. sepal width in cm
 3. petal length in cm
 4. petal width in cm
 5. class: 
      -- Iris Setosa
      -- Iris Versicolour
      -- Iris Virginica
      

The next few code cells generate the first figures in chapter 5. The first actual code sample comes after:

In [None]:
from sklearn import datasets

iris = datasets.load_iris()
iris["data"][0:10,]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

# Large margin classification

In [None]:
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"]


setosa_or_versicolor = (y == 0) | (y == 1)
X = X[setosa_or_versicolor]
y = y[setosa_or_versicolor]

# Write a code to split your dataset into 80/20 dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= 0.2)

X_train.shape, X_test.shape, y_train.shape, y_test.shape



((80, 2), (20, 2), (80,), (20,))

# Build Model


In [None]:
# SVM Classifier model
from sklearn.svm import SVC
svm_clf = SVC(kernel="poly",degree=3, C=float("inf"), coef0=1)
svm_clf.fit(X_train, y_train)





SVC(C=inf, cache_size=200, class_weight=None, coef0=1,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

# Predict

In [None]:
y_train_pred = svm_clf.predict(X_train)

print(y_train_pred)


y_test_pred = svm_clf.predict(X_test)

print(y_test_pred)

[1 1 0 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0
 1 0 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 1
 0 1 1 0 1 1]
[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]


# Evaluate


In [None]:
from sklearn.metrics import precision_score, recall_score

print(precision_score(y_train, y_train_pred))
print(recall_score(y_train, y_train_pred))



1.0
1.0


In [None]:
from sklearn.metrics import precision_score, recall_score

precision_score(y_test, y_test_pred)


1.0

This is the first code example in chapter 5:

In [None]:
#Try SGD with high alpha

from sklearn import linear_model
clf = linear_model.SGDClassifier(alpha=0.1,max_iter=10, tol=1e-3)
clf.fit(X_train, y_train)



SGDClassifier(alpha=0.1, average=False, class_weight=None, early_stopping=False,
              epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15,
              learning_rate='optimal', loss='hinge', max_iter=10,
              n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,
              random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

# Predict

In [None]:
y_train_pred_sgd = clf.predict(X_train)

print(y_train_pred_sgd)


y_test_pred_sgd = clf.predict(X_test)

print(y_test_pred_sgd)

[1 1 0 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0
 1 0 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 1
 0 1 1 0 1 1]
[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]


# Evaluate


In [None]:
from sklearn.metrics import precision_score, recall_score

precision_score(y_train, y_train_pred_sgd)


1.0

In [None]:
from sklearn.metrics import precision_score, recall_score

print(precision_score(y_test, y_test_pred_sgd))
print(recall_score(y_test, y_test_pred_sgd))


1.0
1.0


This is the first code example in chapter 5: