# Iris Classifier

## RBF SVM

In [8]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes=True)
%matplotlib inline
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score,cross_val_predict, GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.pipeline import Pipeline

In [19]:
df = sns.load_dataset('iris')

col = ['petal_length', 'petal_width', 'species']
df.loc[:, col].head()
col = ['petal_length', 'petal_width']
X = df.loc[:,col]

species_to_num = { 'setosa':0,'versicolor':1,'virginica':2 }
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp']

C = 0.5
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=0)

sc_x = StandardScaler()
X_std_train = sc_x.fit_transform(X_train)
C = 0.5
clf = svm.SVC(kernel = 'rbf', C=C, gamma=0.5)
clf.fit(X_std_train,y_train)
res = cross_val_score(clf,X_std_train,y_train, cv=10, scoring = 'accuracy')

In [20]:
y_train_pred = cross_val_predict(clf, X_std_train, y_train, cv=3)
confusion_matrix(y_train, y_train_pred)

print("Precision Score: \t {0:.4f}".format(precision_score(y_train, 
                                                           y_train_pred, 
                                                           average='weighted')))
print("Recall Score: \t\t {0:.4f}".format(recall_score(y_train,
                                                     y_train_pred, 
                                                     average='weighted')))
print("F1 Score: \t\t {0:.4f}".format(f1_score(y_train,
                                             y_train_pred, 
                                             average='weighted')))

Precision Score: 	 0.9500
Recall Score: 		 0.9500
F1 Score: 		 0.9500


In [21]:
y_test_pred = cross_val_predict(clf, sc_x.transform(X_test), y_test, cv=3)

confusion_matrix(y_test, y_test_pred)

print("Precision Score: \t {0:.4f}".format(precision_score(y_test, 
                                                           y_test_pred, 
                                                           average='weighted')))
print("Recall Score: \t\t {0:.4f}".format(recall_score(y_test,
                                                     y_test_pred, 
                                                     average='weighted')))
print("F1 Score: \t\t {0:.4f}".format(f1_score(y_test,
                                             y_test_pred, 
                                             average='weighted')))

Precision Score: 	 0.8980
Recall Score: 		 0.8667
F1 Score: 		 0.8422


In [22]:
pipeline = Pipeline([('clf',svm.SVC(kernel = 'rbf', C=C, gamma=0.7))])
params = {'clf__C':(0.1,0.25,0.5,1,2,3),'clf__gamma':(0.001,0.01,0.1,0.25,0.5,0.75,1)}
svm_grid_rbf  = GridSearchCV(pipeline,params,n_jobs=-1,cv=3,verbose=1,scoring='accuracy')
svm_grid_rbf.fit(X_train,y_train)

Fitting 3 folds for each of 42 candidates, totalling 126 fits


GridSearchCV(cv=3, estimator=Pipeline(steps=[('clf', SVC(C=0.5, gamma=0.7))]),
             n_jobs=-1,
             param_grid={'clf__C': (0.1, 0.25, 0.5, 1, 2, 3),
                         'clf__gamma': (0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1)},
             scoring='accuracy', verbose=1)

In [23]:
svm_grid_rbf.best_score_
best  = svm_grid_rbf.best_estimator_.get_params()
for k in sorted(params.keys()):
    print(k,best[k])

clf__C 0.5
clf__gamma 0.5
