# LawnMower Assignment


## 1. Data Setup

In [268]:
# Common imports
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

np.random.seed(1)

# 2. Load the data

In [269]:
# load data
# load data
RidMow = pd.read_csv(r"C:\DSP\WE03\RidingMowers.csv")

RidMow.head

<bound method NDFrame.head of     Income  Lot_Size Ownership
0     60.0      18.4     Owner
1     85.5      16.8     Owner
2     64.8      21.6     Owner
3     61.5      20.8     Owner
4     87.0      23.6     Owner
5    110.1      19.2     Owner
6    108.0      17.6     Owner
7     82.8      22.4     Owner
8     69.0      20.0     Owner
9     93.0      20.8     Owner
10    51.0      22.0     Owner
11    81.0      20.0     Owner
12    75.0      19.6  Nonowner
13    52.8      20.8  Nonowner
14    64.8      17.2  Nonowner
15    43.2      20.4  Nonowner
16    84.0      17.6  Nonowner
17    49.2      17.6  Nonowner
18    59.4      16.0  Nonowner
19    66.0      18.4  Nonowner
20    47.4      16.4  Nonowner
21    33.0      18.8  Nonowner
22    51.0      14.0  Nonowner
23    63.0      14.8  Nonowner>

In [270]:
# split the data into validation and training set
X = RidMow.drop('Ownership', axis=1)
y = RidMow['Ownership']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

3. Model the data

In [271]:
#First, we will create a dataframe to hold all the results of our models.
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

### 3.1 Fit a SVM classification model using linear kernal

In [272]:
svm_lin_model = SVC(kernel="linear", probability=True)
_ = svm_lin_model.fit(X_train, np.ravel(y_train))

In [273]:
model_preds = svm_lin_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"linear svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

### 3.2 Fit a SVM classification model using rbf kernal

In [274]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale',probability=True)
_ = svm_rbf_model.fit(X_train, np.ravel(y_train))
model_preds = svm_rbf_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

### 3.3 Fit a SVM classification model using polynomial kernal

In [275]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10, probability=True)
_ = svm_poly_model.fit(X_train, np.ravel(y_train))

In [276]:
model_preds = svm_poly_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

## 4.0 Summary

Sorted by accuracy, the best models are:

In [277]:
performance.sort_values(by=['Accuracy'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.625,0.75,0.6,0.666667
0,rbf svm,0.625,0.75,0.6,0.666667
0,poly svm,0.75,1.0,0.6,0.75


Sorted by Precision, the best models are:

In [278]:
performance.sort_values(by=['Precision'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.625,0.75,0.6,0.666667
0,rbf svm,0.625,0.75,0.6,0.666667
0,poly svm,0.75,1.0,0.6,0.75


Sorted by Recall, the best models are:

In [279]:
performance.sort_values(by=['Recall'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.625,0.75,0.6,0.666667
0,rbf svm,0.625,0.75,0.6,0.666667
0,poly svm,0.75,1.0,0.6,0.75


Sorted by F1, the best models are:

In [280]:
performance.sort_values(by=['F1'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,0.625,0.75,0.6,0.666667
0,rbf svm,0.625,0.75,0.6,0.666667
0,poly svm,0.75,1.0,0.6,0.75


### So which model is the 'Winning model'?

The model which is having the highest F1, Recall, Precision and accuracy values can be considered as winning model. As per the generated results above that are sorted according to Accuracy, precision, Recall and F1 looks like 'Poly SVM' is the best winning model. However for best analysis and output TP,TN,FP,FN values should be considered. 

In [281]:
#save the 'winning' model to a pickle file
import pickle

pickle.dump(svm_poly_model, open('C:\DSP\WE03\RidMow_assignment_WE05_pickle.csv', 'wb'))