# Support Vector Machine - Model

In [11]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("Social_Network_Ads.csv")
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [5]:
df['Gender'].value_counts()

Female    204
Male      196
Name: Gender, dtype: int64

In [6]:
df['Gender'].replace({"Female":0, "Male":1}, inplace=True)

In [14]:
df['Gender']

0      1
1      1
2      0
3      0
4      1
      ..
395    0
396    1
397    0
398    1
399    0
Name: Gender, Length: 400, dtype: int64

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   User ID          400 non-null    int64
 1   Gender           400 non-null    int64
 2   Age              400 non-null    int64
 3   EstimatedSalary  400 non-null    int64
 4   Purchased        400 non-null    int64
dtypes: int64(5)
memory usage: 15.8 KB


## Scaling

In [8]:
df.columns

Index(['User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

In [10]:
x = df.drop(['User ID', 'Gender', 'Purchased'], axis=1)
x

Unnamed: 0,Age,EstimatedSalary
0,19,19000
1,35,20000
2,26,43000
3,27,57000
4,19,76000
...,...,...
395,46,41000
396,51,23000
397,50,20000
398,36,33000


In [13]:
std_scalar = StandardScaler()

array = std_scalar.fit_transform(x)

x = pd.DataFrame(array, columns=x.columns)
x

Unnamed: 0,Age,EstimatedSalary
0,-1.781797,-1.490046
1,-0.253587,-1.460681
2,-1.113206,-0.785290
3,-1.017692,-0.374182
4,-1.781797,0.183751
...,...,...
395,0.797057,-0.844019
396,1.274623,-1.372587
397,1.179110,-1.460681
398,-0.158074,-1.078938


In [16]:
x['Gender'] = df['Gender']
x

y = df['Purchased']
y

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64

### Train Test Split

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10, stratify=y)

### Model Training

In [19]:
svm_clf = SVC()
svm_clf.fit(x_train, y_train)

### Model Evaluation

In [23]:
# Testing Data
y_pred = svm_clf.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cnf_matrix)
print("*" *50)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("*" *50)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[46  5]
 [ 2 27]]
**************************************************
Accuracy : 0.9125
**************************************************
Classification report:
               precision    recall  f1-score   support

           0       0.96      0.90      0.93        51
           1       0.84      0.93      0.89        29

    accuracy                           0.91        80
   macro avg       0.90      0.92      0.91        80
weighted avg       0.92      0.91      0.91        80



In [24]:
# Training Data
y_pred_train = svm_clf.predict(x_train)

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix:\n", cnf_matrix)
print("*" *50)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("*" *50)

clf_report = classification_report(y_train, y_pred_train)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[188  18]
 [ 11 103]]
**************************************************
Accuracy : 0.909375
**************************************************
Classification report:
               precision    recall  f1-score   support

           0       0.94      0.91      0.93       206
           1       0.85      0.90      0.88       114

    accuracy                           0.91       320
   macro avg       0.90      0.91      0.90       320
weighted avg       0.91      0.91      0.91       320



## Hyperparameter Tunning

In [30]:
# GridSearchCV(estimator, param_grid)

svm_clf = SVC()

param_grid = {"C": np.arange(1,1000), "kernel":['linear', 'poly', 'rbf', 'sigmoid']}  # bydefault kernel = rbf

gscv_svm = GridSearchCV(svm_clf, param_grid, cv = 5, n_jobs=-1)
gscv_svm.fit(x_train, y_train)

gscv_svm.best_estimator_

In [28]:
svm_clf = gscv_svm.best_estimator_
svm_clf.fit(x_train, y_train)

# Testing Data
y_pred = svm_clf.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cnf_matrix)
print("*" *50)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("*" *50)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[45  6]
 [ 2 27]]
**************************************************
Accuracy : 0.9
**************************************************
Classification report:
               precision    recall  f1-score   support

           0       0.96      0.88      0.92        51
           1       0.82      0.93      0.87        29

    accuracy                           0.90        80
   macro avg       0.89      0.91      0.89        80
weighted avg       0.91      0.90      0.90        80



In [29]:
# Training Data
y_pred_train = svm_clf.predict(x_train)

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix:\n", cnf_matrix)
print("*" *50)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("*" *50)

clf_report = classification_report(y_train, y_pred_train)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[192  14]
 [  7 107]]
**************************************************
Accuracy : 0.934375
**************************************************
Classification report:
               precision    recall  f1-score   support

           0       0.96      0.93      0.95       206
           1       0.88      0.94      0.91       114

    accuracy                           0.93       320
   macro avg       0.92      0.94      0.93       320
weighted avg       0.94      0.93      0.93       320

