In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [68]:
dataset = pd.read_csv('Social_network_Ads.csv')
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


### Missing Value Analysis

In [69]:
dataset.isnull().sum()

User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

### Encoding Categorical Values

In [70]:
from sklearn.preprocessing import OneHotEncoder
oh = OneHotEncoder(handle_unknown='error')

In [71]:
enc_cols = pd.DataFrame(oh.fit_transform(dataset[['Gender']]).toarray())

In [72]:
enc_cols.columns = ['Female', 'Male']
enc_cols.head(2)

Unnamed: 0,Female,Male
0,0.0,1.0
1,0.0,1.0


In [73]:
dataset.head(2)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0


##### Dropping Categorical column from main dataset

In [74]:
dataset.drop('Gender' , axis = 1 , inplace = True)

In [75]:
dataset.head(2)

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased
0,15624510,19,19000,0
1,15810944,35,20000,0


In [76]:
dataset['Male'] = enc_cols['Male']
dataset['Female'] = enc_cols['Female']

In [77]:
dataset.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Male,Female
0,15624510,19,19000,0,1.0,0.0
1,15810944,35,20000,0,1.0,0.0
2,15668575,26,43000,0,0.0,1.0
3,15603246,27,57000,0,0.0,1.0
4,15804002,19,76000,0,1.0,0.0


**Rearranging columns**

In [78]:
dataset = dataset[['User ID','Age','Male','Female','EstimatedSalary','Purchased']]

In [79]:
dataset.head()

Unnamed: 0,User ID,Age,Male,Female,EstimatedSalary,Purchased
0,15624510,19,1.0,0.0,19000,0
1,15810944,35,1.0,0.0,20000,0
2,15668575,26,0.0,1.0,43000,0
3,15603246,27,0.0,1.0,57000,0
4,15804002,19,1.0,0.0,76000,0


##### Seperating Feature Variables and Target Variable

In [80]:
x = dataset.iloc[:,1:5]
y = dataset.iloc[:,-1]

### Feature Scaling(standardization)

In [81]:
x.index

RangeIndex(start=0, stop=400, step=1)

In [82]:
col_names = x.columns

In [83]:
for i in col_names:
    x[[i]] = (x[[i]] - np.mean(x[[i]]))/np.std(x[[i]])

In [88]:
x.head(3)

Unnamed: 0,Age,Male,Female,EstimatedSalary
0,-1.781797,1.020204,-1.020204,-1.490046
1,-0.253587,1.020204,-1.020204,-1.460681
2,-1.113206,-0.980196,0.980196,-0.78529


### Train Test Split

In [95]:
from sklearn.model_selection import train_test_split

In [96]:
x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.33, random_state=42)

## Modelling

In [89]:
from sklearn.svm import SVC

In [98]:
classifier = SVC(kernel = 'linear' , random_state = 0)
#Fitting it to training set
classifier.fit(x_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=0,
    shrinking=True, tol=0.001, verbose=False)

In [99]:
y_pred = classifier.predict(x_test)

### Confusion Matrix

In [100]:
from sklearn.metrics import confusion_matrix

In [102]:
cm = confusion_matrix(y_test,y_pred)

In [103]:
cm

array([[79,  1],
       [22, 30]], dtype=int64)

In [106]:
from sklearn.metrics import classification_report

In [108]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.78      0.99      0.87        80
           1       0.97      0.58      0.72        52

    accuracy                           0.83       132
   macro avg       0.87      0.78      0.80       132
weighted avg       0.86      0.83      0.81       132

