# Support Vector Machine using IRIS Data Set

Classification practice using the IRIS dataset (which maps features to species of the Iris flower)

### Definitions
**Separating line** or **Hyperplane** - divides the data set so as to maximize the minimum distance between the two classes of data.

**Support Vectors** - points in the data set that are closest to the hypeerplane

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
import pandas as pd
from sklearn import svm, datasets

In [None]:
df = sns.load_dataset('iris')
df.head()

In [None]:
df.species.unique()

In [None]:
col=['petal_length','petal_width','species']
df.loc[:,col].head()

In [None]:
col=['petal_length','petal_width']
X = df.loc[:,col]

In [None]:
species_to_num = {'setosa': 0, 'versicolor': 1, 'virginica':2}
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp']

In [None]:
C = 0.1
svc = svm.SVC(kernel='linear',C=C)
#svc = svm.LinearSVC(C=C,loss='hinge')
#svc = svm.SVC(kernel='poly',degree=3,C=C)
#svc = svm.SVC(kernel='rbf',gamma=0.7,C=C)
svc.fit(X,y)

In [None]:
svc.predict([[6,2]])

In [None]:
Xv = X.values.reshape(-1,1)
h = 0.02
x_min, x_max = Xv.min(), Xv.max() + 1
y_min, y_max = y.min(), y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h))
z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
fig=plt.figure(figsize=(12,8))
ax=plt.contourf(xx,yy,z,cmap='afmhot',alpha=0.3)
plt.scatter(X.values[:,0],X.values[:,1],c=y,s=80,alpha=0.5,edgecolors='g')

## Linear SVM Implementation

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=0)

#### Scale features

In [None]:
sc_x = StandardScaler()
X_std_train = sc_x.fit_transform(X_train)

In [None]:
C = 1.0
svc = svm.SVC(kernel='linear', C=C)
svc.fit(X_std_train, y_train)

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score


#### Cross validation within training dataset

In [None]:
res = cross_val_score(svc, X_std_train,y_train, cv=10,scoring='accuracy')
print("Accuracy MEAN: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t {0:.4f}".format(np.std(res)))

In [None]:
y_train_pred = cross_val_predict(svc, X_std_train, y_train,cv=3)

In [None]:
confusion_matrix(y_train, y_train_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train,y_train_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_train,y_train_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_train,y_train_pred, average='weighted')))

#### Cross validation within test data set

In [None]:
y_test_pred = cross_val_predict(svc, sc_x.transform(X_test), y_test, cv=3)

In [None]:
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

### Linear SVM Implementation with training set = 60%

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.6,random_state=0)

In [None]:
sc_x = StandardScaler()
X_std_train = sc_x.fit_transform(X_train)
C = 1.0
svc = svm.SVC(kernel='linear', C=C)
svc.fit(X_std_train, y_train)
res = cross_val_score(svc, X_std_train,y_train, cv=10,scoring='accuracy')
print("Accuracy MEAN: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t {0:.4f}".format(np.std(res)))
y_train_pred = cross_val_predict(svc, X_std_train, y_train,cv=3)
confusion_matrix(y_train, y_train_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train,y_train_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_train,y_train_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_train,y_train_pred, average='weighted')))

In [None]:
y_test_pred = cross_val_predict(svc, sc_x.transform(X_test), y_test, cv=3)
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

### Linear SVM Implementation with C = 0.5

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=0)

In [None]:
sc_x = StandardScaler()
X_std_train = sc_x.fit_transform(X_train)
C = 0.5
svc = svm.SVC(kernel='linear', C=C)
svc.fit(X_std_train, y_train)
res = cross_val_score(svc, X_std_train,y_train, cv=10,scoring='accuracy')
print("Accuracy MEAN: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t {0:.4f}".format(np.std(res)))
y_train_pred = cross_val_predict(svc, X_std_train, y_train,cv=3)
confusion_matrix(y_train, y_train_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train,y_train_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_train,y_train_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_train,y_train_pred, average='weighted')))

In [None]:
y_test_pred = cross_val_predict(svc, sc_x.transform(X_test), y_test, cv=3)
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

## Polynomial SVC

In [None]:
C = 1
svc = svm.SVC(kernel='poly', degree=5, C=C)
svc.fit(X, y)

In [None]:
Xv = X.values.reshape(-1,1)
h = 0.02
x_min, x_max = Xv.min(), Xv.max() + 1
y_min, y_max = y.min(), y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h))
z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
fig=plt.figure(figsize=(12,8))
ax=plt.contourf(xx,yy,z,cmap='afmhot',alpha=0.3)
plt.scatter(X.values[:,0],X.values[:,1],c=y,s=80,alpha=0.5,edgecolors='g')

### Polynomial SVM Implementation

In [None]:
svc.fit(X_std_train, y_train)

In [None]:
res = cross_val_score(svc, X_std_train,y_train, cv=10,scoring='accuracy')
print("Accuracy MEAN: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t {0:.4f}".format(np.std(res)))
y_train_pred = cross_val_predict(svc, X_std_train, y_train,cv=3)
confusion_matrix(y_train, y_train_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train,y_train_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_train,y_train_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_train,y_train_pred, average='weighted')))

In [None]:
y_test_pred = cross_val_predict(svc, sc_x.transform(X_test), y_test, cv=3)
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

## Gaussian Radial Basis Function (rbf)

In [None]:
C = 0.1
svc = svm.SVC(kernel='rbf', gamma=0.25, C=C)
svc.fit(X, y)

In [None]:
Xv = X.values.reshape(-1,1)
h = 0.02
x_min, x_max = Xv.min(), Xv.max() + 1
y_min, y_max = y.min(), y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h))
z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
fig=plt.figure(figsize=(12,8))
ax=plt.contourf(xx,yy,z,cmap='afmhot',alpha=0.3)
plt.scatter(X.values[:,0],X.values[:,1],c=y,s=80,alpha=0.5,edgecolors='g')

In [None]:
svc.fit(X_std_train, y_train)

In [None]:
res = cross_val_score(svc, X_std_train,y_train, cv=10,scoring='accuracy')
print("Accuracy MEAN: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t {0:.4f}".format(np.std(res)))
y_train_pred = cross_val_predict(svc, X_std_train, y_train,cv=3)
confusion_matrix(y_train, y_train_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train,y_train_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_train,y_train_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_train,y_train_pred, average='weighted')))

In [None]:
y_test_pred = cross_val_predict(svc, sc_x.transform(X_test), y_test, cv=3)
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

## Grid Search

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=0)
X_std_train = sc_x.fit_transform(X_train)
pipeline = Pipeline([('clf', svm.SVC(kernel='rbf',C=1,gamma=0.1))])
params = {'clf__C':(0.1,0.5,1,2,5,10,20), 'clf__gamma':(0.001,0.1,0.1,0.25,0.5,0.75,1)}

In [None]:
svm_grid_rbf = GridSearchCV(pipeline, params, n_jobs=-1,cv=3,verbose=1,scoring='accuracy')

In [None]:
svm_grid_rbf.fit(X_train, y_train)

In [None]:
svm_grid_rbf.best_score_

In [None]:
best = svm_grid_rbf.best_estimator_.get_params()

In [None]:
best

In [None]:
for k in sorted(params.keys()):
    print('\t{0}: \t {1:.2f}'.format(k,best[k]))

In [None]:
X_std_test = sc_x.fit_transform(X_test)
y_test_pred = svm_grid_rbf.predict(X_test)

In [None]:
confusion_matrix(y_test, y_test_pred)

In [None]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test,y_test_pred, average='weighted')))
print("Recall Score: \t {0:.4f}".format(recall_score(y_test,y_test_pred, average='weighted')))
print("f1 Score: \t {0:.4f}".format(f1_score(y_test,y_test_pred, average='weighted')))

## Support Vector Regression

In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
svr = SVR()
svr.fit(X_train,y_train)
y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)
print("MSE train: {0:.4f}, test: {1:.4f}".format(mean_squared_error(y_train, y_train_pred),
                                                mean_squared_error(y_test, y_test_pred)))
print("R^2 train: {0:.4f}, test: {1:.4f}".format(r2_score(y_train, y_train_pred),
                                                r2_score(y_test, y_test_pred)))

### Linear

In [None]:
svr = SVR(kernel='linear')
svr.fit(X_train,y_train)
y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)
print("MSE train: {0:.4f}, test: {1:.4f}".format(mean_squared_error(y_train, y_train_pred),
                                                mean_squared_error(y_test, y_test_pred)))
print("R^2 train: {0:.4f}, test: {1:.4f}".format(r2_score(y_train, y_train_pred),
                                                r2_score(y_test, y_test_pred)))

### Poly

In [None]:
svr = SVR(kernel='poly',degree=2,C=1e3)
svr.fit(X_train,y_train)
y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)
print("MSE train: {0:.4f}, test: {1:.4f}".format(mean_squared_error(y_train, y_train_pred),
                                                mean_squared_error(y_test, y_test_pred)))
print("R^2 train: {0:.4f}, test: {1:.4f}".format(r2_score(y_train, y_train_pred),
                                                r2_score(y_test, y_test_pred)))

### RBG

In [None]:
svr = SVR(kernel='rbf',gamma=0.1,C=1e3)
svr.fit(X_train,y_train)
y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)
print("MSE train: {0:.4f}, test: {1:.4f}".format(mean_squared_error(y_train, y_train_pred),
                                                mean_squared_error(y_test, y_test_pred)))
print("R^2 train: {0:.4f}, test: {1:.4f}".format(r2_score(y_train, y_train_pred),
                                                r2_score(y_test, y_test_pred)))