In [1]:
import pandas as pd
from sklearn.svm import SVC
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

np.random.seed(1)

# Loading The Data

In [3]:
# Uncomment the following snippet of code to debug problems with findi
#ng the .csv file path
# This snippet of code will exit the program and print the current working directory.
#import os
#print(os.getcwd())

In [4]:
df = pd.read_csv('RidingMowers.csv')
df.head(3)

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,Owner
1,85.5,16.8,Owner
2,64.8,21.6,Owner


In [5]:
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
df['Ownership'] = label_encoder.fit_transform(df['Ownership'])
df

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,1
1,85.5,16.8,1
2,64.8,21.6,1
3,61.5,20.8,1
4,87.0,23.6,1
5,110.1,19.2,1
6,108.0,17.6,1
7,82.8,22.4,1
8,69.0,20.0,1
9,93.0,20.8,1


# Partition the data into a training and validation set

In [6]:
# partition data
#X= df['Income','Lot_Size']
X= df.drop(['Ownership'],axis=1)
y=df['Ownership']
train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.3, random_state=1) # develop a two partition split with 70% for training, and 30% for validation testing.

In [8]:
valid_y

13    0
18    0
3     1
14    0
20    0
17    0
10    1
4     1
Name: Ownership, dtype: int32

# Model the data

In [9]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

# Fit a SVM classification model using linear kernal

In [10]:
svm_lin_model = SVC(kernel="linear",probability=True)
_ = svm_lin_model.fit(train_X, np.ravel(train_y))

In [11]:
model_preds = svm_lin_model.predict(train_X)
c_matrix = confusion_matrix(train_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"linear svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Fit a SVM classification model using rbf kernal

In [12]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale',probability=True)
_ = svm_rbf_model.fit(train_X, np.ravel(train_y))

In [13]:
model_preds = svm_rbf_model.predict(train_X)
c_matrix = confusion_matrix(train_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Fit a SVM classification model using polynomial kernal

In [14]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10, probability=True)
_ = svm_poly_model.fit(train_X, np.ravel(train_y))

In [15]:
model_preds = svm_poly_model.predict(train_X)
c_matrix = confusion_matrix(train_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [16]:
## 4.0 Summary

performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.75,0.727273,0.888889,0.8
0,rbf svm,0.75,0.727273,0.888889,0.8
0,poly svm,0.75,0.727273,0.888889,0.8


# performance summary 

# Sorted by accuracy

In [19]:
performance.sort_values(by=['Accuracy'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.75,0.727273,0.888889,0.8
0,rbf svm,0.75,0.727273,0.888889,0.8
0,poly svm,0.75,0.727273,0.888889,0.8


# Sorted by Precision

In [20]:
performance.sort_values(by=['Precision'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.75,0.727273,0.888889,0.8
0,rbf svm,0.75,0.727273,0.888889,0.8
0,poly svm,0.75,0.727273,0.888889,0.8


# Sorted by Recall

In [21]:
performance.sort_values(by=['Recall'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.75,0.727273,0.888889,0.8
0,rbf svm,0.75,0.727273,0.888889,0.8
0,poly svm,0.75,0.727273,0.888889,0.8


# Sorted by F1

In [22]:
performance.sort_values(by=['F1'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.75,0.727273,0.888889,0.8
0,rbf svm,0.75,0.727273,0.888889,0.8
0,poly svm,0.75,0.727273,0.888889,0.8


# Save the model to disk

In [17]:
from sklearn.svm import SVC
import pickle

# save model
pickle.dump(svm_poly_model, open('we03 rana.csv', "wb"))

# If you wish to load this model later, simply use pickle.load method
#loaded_model = pickle.load(open('logistic_model_example01.pkl', "rb"))

# We have split the SVM data into 30% test and 70% train. according to me all the SVM models have the same values. so I'm considering the poly SVM is the best model