Import Required python modules

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import pickle

In [3]:
dataset = pd.read_csv("../../Dataset/Social_Network_Ads.csv")
dataset

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [5]:
dataset=dataset.drop('User ID',axis=1)
dataset.columns

Index(['Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

In [7]:
dataset = dataset[['Gender', 'Age', 'EstimatedSalary','Purchased']]
dataset

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


OneHot encoding

In [10]:
dataset = pd.get_dummies(dataset,dtype = int, drop_first=True)
dataset

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
0,19,19000,0,1
1,35,20000,0,1
2,26,43000,0,0
3,27,57000,0,0
4,19,76000,0,1
...,...,...,...,...
395,46,41000,1,0
396,51,23000,1,1
397,50,20000,1,0
398,36,33000,0,1


In [12]:
dataset.shape
dataset.describe()

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
count,400.0,400.0,400.0,400.0
mean,37.655,69742.5,0.3575,0.49
std,10.482877,34096.960282,0.479864,0.500526
min,18.0,15000.0,0.0,0.0
25%,29.75,43000.0,0.0,0.0
50%,37.0,70000.0,0.0,0.0
75%,46.0,88000.0,1.0,1.0
max,60.0,150000.0,1.0,1.0


In [14]:
independent = dataset[['Age', 'EstimatedSalary','Gender_Male']]
dependent = dataset[['Purchased']]

In [16]:
dataset['Purchased'].value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [18]:
x_train,x_test,y_train,y_test = train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [20]:
x_train,y_train,x_test,y_test

(     Age  EstimatedSalary  Gender_Male
 92    26            15000            1
 223   60           102000            1
 234   38           112000            0
 232   40           107000            1
 377   42            53000            0
 ..   ...              ...          ...
 323   48            30000            0
 192   29            43000            1
 117   36            52000            1
 47    27            54000            0
 172   26           118000            0
 
 [280 rows x 3 columns],
      Purchased
 92           0
 223          1
 234          0
 232          1
 377          0
 ..         ...
 323          1
 192          0
 117          0
 47           0
 172          0
 
 [280 rows x 1 columns],
      Age  EstimatedSalary  Gender_Male
 132   30            87000            1
 309   38            50000            0
 341   35            75000            1
 196   30            79000            0
 246   35            50000            0
 ..   ...              ...        

In [24]:
classifier = SVC(C=10, gamma='scale',kernel='rbf')
classifier.fit(x_train,y_train)

  y = column_or_1d(y, warn=True)


In [26]:
y_pred = classifier.predict(x_test)

In [28]:
y_pred.shape

(120,)

In [30]:
cm=confusion_matrix(y_test,y_pred)
cm

array([[77,  2],
       [23, 18]], dtype=int64)

In [32]:
clf_report = classification_report(y_test,y_pred)

In [34]:
print(clf_report)

              precision    recall  f1-score   support

           0       0.77      0.97      0.86        79
           1       0.90      0.44      0.59        41

    accuracy                           0.79       120
   macro avg       0.83      0.71      0.73       120
weighted avg       0.81      0.79      0.77       120



In [None]:
#Overall accuarcy : 91%
#T : True ; F: False ; NP : Not Purchased ; P : Purchased


#                Predicted
#                NP      P
#              ----------------  
#Actual   NP  | T(NP) | F(P) |    Recall
#             -----------------   
#          P  | F(NP)  | T(P)  |
#             ----------------
#               Precision


#Precision : Percentage of correct and wrong prediction of 'Not Purchased' and 'Purchased' : (T(NP)/(T(NP)+F{NP)) ; T(P)/T(P)+F(P))
#Recall : Percentage of correct prediction of 'Not Purchased' and 'Purchased' :  (T(NP)/T(NP)+F(P)) ; T(P)/T(P)+F(NP))

#(Precision) Percentage of correct and wrongly predicted 'Not Purchased' value : 77
#(Precision) Percentage of correct and wrongly predicted 'Purchased' value : 90
#(Recall) Percentage of correctly predicted 'Not Purchased' value : 97
#(Recall) Percentage of correctly predicted 'Purchased' value : 44
#(F1-score) Percentage of overall prediction of 'Not Purchased' : 86
#(F1-score) Percentage of overall prediction of 'Not Purchased' : 59
#Average of percentage of Precision (correctly and wrongly predicted values) : 83
#Average of percentage of Recall (correctly predicted values) : 71
#Average of percentage of F1-score (overall correctly predicted values) : 73
#Weighted Average (Propertion) of Precision (correctly and wrongly predicted values) : 81
#Weighted Average (Propertion) of Recall (correctly predicted values) : 79
#Weighted Average (Propertion) of F1-score (overall correctly predicted values) : 77