## Importing Libraries

In [67]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Loading Dataset

In [68]:
df = pd.read_csv("Social_Network_Ads.csv")

In [69]:
df.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


## Train-Test Split

In [70]:
X = df[['Age','EstimatedSalary']]
y = df['Purchased']

In [71]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.25)

In [72]:
X_train.shape

(300, 2)

## Modelling - LogisticRegression (Original Data)

In [73]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

## Model Score

In [74]:
model.score(X_test, y_test)

0.58

**The score (58%) is not that much impressive, so we will use MinMaxScaler to Scale our original dataset.**

## MinMaxScaler

In [75]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)

In [76]:
MinMaxScaler()

In [77]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Modelling - LogisticRegression (Scaled Data)

In [78]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

In [79]:
model.score(X_test_scaled, y_test)

0.8

**The new accuracy is 80%, which is really impressive.**

In [80]:
# This will show how our scaled data looks like

X_train_scaled[0:10]

array([[0.78571429, 0.05925926],
       [0.0952381 , 0.35555556],
       [0.42857143, 0.62222222],
       [0.66666667, 0.43703704],
       [0.30952381, 0.02222222],
       [0.47619048, 0.37037037],
       [0.19047619, 0.01481481],
       [0.35714286, 0.4       ],
       [0.45238095, 0.40740741],
       [0.23809524, 0.47407407]])

## Confusion Matrix

In [81]:
from sklearn.metrics import confusion_matrix

In [82]:
y_p = model.predict(X_test_scaled)
y_p

array([0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int64)

In [83]:
confusion_matrix(y_test,y_p)

array([[55,  3],
       [17, 25]], dtype=int64)

**True Positive:** 56<br>
**False Positive:** 3<br>
**False Negative:** 15<br>
**True Negative:** 26<br>

## Accuracy

In [84]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test,y_p)
acc

0.8

## Error Rate

In [85]:
# Error rate is given as 1 - Accuracy

err = 1 - acc
err

0.19999999999999996

## Precision

In [86]:
from sklearn.metrics import precision_score

prec = precision_score(y_test, y_p)
prec

0.8928571428571429

## Recall

In [87]:
from sklearn.metrics import recall_score

rec = recall_score(y_test, y_p)
rec

0.5952380952380952

## Conclusion
**Accuracy:** 80%<br>
**Error Rate:** 20%<br>
**Precision:** 89.2%<br>
**Recall:** 59.6%<br>