In [92]:
#importing libraries

from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split

np.random.seed(1)

In [93]:
#Data file reading

df= pd.read_csv("C:/Users/ssand/Downloads/RidingMowers.csv")

In [94]:
#Data vewing

df

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,Owner
1,85.5,16.8,Owner
2,64.8,21.6,Owner
3,61.5,20.8,Owner
4,87.0,23.6,Owner
5,110.1,19.2,Owner
6,108.0,17.6,Owner
7,82.8,22.4,Owner
8,69.0,20.0,Owner
9,93.0,20.8,Owner


In [95]:
# One-hot coding using label encoding

from sklearn.preprocessing import LabelEncoder

lab = LabelEncoder()

#perform label encoding o
df['Ownership'] = lab.fit_transform(df['Ownership'])

In [96]:
df

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,1
1,85.5,16.8,1
2,64.8,21.6,1
3,61.5,20.8,1
4,87.0,23.6,1
5,110.1,19.2,1
6,108.0,17.6,1
7,82.8,22.4,1
8,69.0,20.0,1
9,93.0,20.8,1


In [97]:
#train,test and split the data

X = df.iloc[:,:-1]
y = df.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.30)

In [98]:

performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

In [99]:
#SVM linear

svm_lin_model = SVC(kernel="linear", probability = True)
_ = svm_lin_model.fit(X_train, np.ravel(y_train))

model_preds = svm_lin_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm linear", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [100]:
#SVM kernel

svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale',  probability = True)
_ = svm_rbf_model.fit(X_train, np.ravel(y_train))

model_preds = svm_rbf_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [101]:
#SVM poly kernel

svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10,  probability = True)
_ = svm_poly_model.fit(X_train, np.ravel(y_train))

model_preds = svm_poly_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [102]:
performance.sort_values(by=['Accuracy'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,svm linear,1.0,1.0,1.0,1.0


In [103]:
performance.sort_values(by=['Precision'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,svm linear,1.0,1.0,1.0,1.0
0,poly svm,0.875,1.0,0.666667,0.8


In [104]:
performance.sort_values(by=['Recall'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,svm linear,1.0,1.0,1.0,1.0


In [105]:
performance.sort_values(by=['F1'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,svm linear,1.0,1.0,1.0,1.0


In [106]:
#based on the above poly svm is the best performing model and the linear svm model is over-fitting. Clearly we can say that poly svm will performance better predictions for ownership.

In [107]:
import pickle

pickle.dump(svm_poly_model, open('C:/Users/ssand/OneDrive/Desktop/WE03/Pickletest.csv', 'wb'))