#Objective: Building a classification model using Iris dataset

Initial Setup and loading packages

In [0]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Look at Features Description for Iris Dataset

 1. sepal length in cm
 2. sepal width in cm
 3. petal length in cm
 4. petal width in cm
 5. class: 
      -- Iris Setosa
      -- Iris Versicolour
      -- Iris Virginica
      

The next few code cells generate the first figures in chapter 5. The first actual code sample comes after:

In [0]:
from sklearn import datasets
#load iris data
iris = datasets.load_iris()
iris["data"][0:10,]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

# Large margin classification

In [0]:
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"] #choose training target

#select data from only two classes
setosa_or_versicolor = (y == 0) | (y == 1)
X = X[setosa_or_versicolor]
y = y[setosa_or_versicolor]

# Write a code to split your dataset into 80/20 dataset

X_train, X_test, y_train, y_test = ...

# verify if the split is correct by looking at shape of each dataframe
...

#Your results should be same as below
#((80, 2), (20, 2), (80,), (20,))


# Build Model


In [0]:
#Your code to import support vector machine classifier model from SKlearn


#Define your model object. Select polynomial kernel to create 3rd degree of polynomial terms. Lets try different values for C. Find the best value for C. 
# Lets try these values and measure prevision & recall. C=-1, C=0, C=0.0000001, C=0.00005, C=0.001, C=0.1
svm_clf = ...

# Write a code to train the model 





SVC(C=0.0002, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

# Predict

In [0]:
# Write a code to predict the value of y using train dataset
y_train_pred = ....

print(y_train_pred)


# Write a code to predict the value of y using test dataset
y_test_pred = ...

print(y_test_pred)

[1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0
 1 0 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 1
 0 1 1 0 1 1]
[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]


# Evaluate


In [0]:
# Write the code to measure precision and recall for training dataset. 
...



1.0
0.9285714285714286


In [0]:
# Write the code to measure precision and recall for testing dataset. 
...



1.0

# Now, lets build a model using SGD classifer and compare the result.

In [0]:
#Try SGD with high alpha
# Set these values for your model, set alpha=39, and your tolerance for stopping criteria = 0.01 and run optimization for 2 times.
clf = ...
#fit the model
clf.fit(X_train, y_train)

# Predict

In [0]:
# Lets predict. You don't need to make any change below ;-)
y_train_pred_sgd = clf.predict(X_train)

print(y_train_pred_sgd)


y_test_pred_sgd = clf.predict(X_test)

print(y_test_pred_sgd)

[1 1 0 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0
 1 0 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 1
 0 1 1 0 1 1]
[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]


# Evaluate


In [0]:
#evaluate precision and recall for traindataset


1.0

In [0]:
#evaluate precision and recall for test dataset



1.0
1.0


# Do you get a good model? Why? How can you implement changes to make it good? Can you tune hyper parameters?