# Social Network Ads DATASET

## (Predicting if user buys the product based from ads)

In [286]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as scs

In [287]:
df = pd.read_csv("../ml_datasets/Social_Network_Ads.csv")

In [288]:
df.describe()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased
count,400.0,400.0,400.0,400.0
mean,15691540.0,37.655,69742.5,0.3575
std,71658.32,10.482877,34096.960282,0.479864
min,15566690.0,18.0,15000.0,0.0
25%,15626760.0,29.75,43000.0,0.0
50%,15694340.0,37.0,70000.0,0.0
75%,15750360.0,46.0,88000.0,1.0
max,15815240.0,60.0,150000.0,1.0


In [289]:
df.count()

User ID            400
Gender             400
Age                400
EstimatedSalary    400
Purchased          400
dtype: int64

In [290]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [291]:
del df['User ID']

In [292]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [293]:
df.isna().sum()

Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

In [294]:
df['Purchased'].value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [295]:
df.shape

(400, 4)

#### Convert gender to numerical value

In [296]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

### Copying for future use

In [297]:
df1 = df.copy()


In [298]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [299]:
df['Gender'].value_counts()

Gender
0    204
1    196
Name: count, dtype: int64

### Feauture Scaling of data 

In [300]:
from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()
df['Age'] = mm.fit_transform(df[['Age']])
df['EstimatedSalary'] = mm.fit_transform(df[['EstimatedSalary']])

In [301]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,0.02381,0.02963,0
1,1,0.404762,0.037037,0
2,0,0.190476,0.207407,0
3,0,0.214286,0.311111,0
4,1,0.02381,0.451852,0


In [302]:
df.columns

Index(['Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

### Separting x and y values

In [303]:
x = df.iloc[:, :3]
y = df['Purchased']

In [304]:
x.head()

Unnamed: 0,Gender,Age,EstimatedSalary
0,1,0.02381,0.02963
1,1,0.404762,0.037037
2,0,0.190476,0.207407
3,0,0.214286,0.311111
4,1,0.02381,0.451852


In [305]:
x.shape, y.shape

((400, 3), (400,))

### splitting into train test for preparing model

In [306]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.20)

In [307]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(xtrain, ytrain)

In [308]:
lr.coef_, lr.intercept_

(array([[0.10088016, 5.0452363 , 2.60031519]]), array([-4.26280823]))

#### Model =  e^(0.026 Gender + 4.97 Age + 2.537 Salary - 4.24) / (1 + e^(0.026 Gender + 4.97 Age + 2.537 Salary - 4.24))

In [309]:
lr.classes_

array([0, 1])

In [310]:
ypred = lr.predict(xtest)

### Draw confusion matrix for evaluating the predicated values

In [311]:
from sklearn.metrics import confusion_matrix
confusion_matrix(ytest, ypred)

array([[52,  1],
       [12, 15]])

In [312]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
print("Accuracy: ", accuracy_score(ytest, ypred))
print("Precision: ", precision_score(ytest, ypred))
print("Recall_score: ", recall_score(ytest, ypred))
print("F1_score: ", f1_score(ytest, ypred))
print("Roc Auc Score: ", roc_auc_score(ytest, ypred))

Accuracy:  0.8375
Precision:  0.9375
Recall_score:  0.5555555555555556
F1_score:  0.6976744186046512
Roc Auc Score:  0.7683438155136268


## TASK TO DO: Take input from user and predict the value

In [313]:
df.count()

Gender             400
Age                400
EstimatedSalary    400
Purchased          400
dtype: int64

In [314]:
df1.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [317]:
while True:
    # Input Data to predict
    gender = int(input("Choose : 0. Female \t 1. Male"))
    age = int(input("Enter age: "))
    salary = float(input("Enter estimated salary: "))
    
    # Prepare to scale down
    data = {"Gender" : [gender], "Age" : [age], "EstimatedSalary" : [salary]}
    print("Used Input: ", data)
    new_df = pd.DataFrame(data)
    df1 = pd.concat([df1,new_df], ignore_index=True)
    df1.reset_index()
    
    # Scale the values for predition
    df1['Age'] = mm.fit_transform(df1[['Age']])
    df1['EstimatedSalary'] = mm.fit_transform(df1[['EstimatedSalary']])
    
    # Predict if user purchased
    x = df1.iloc[-1:,:3]
    ypred = lr.predict(x)
    print("User Purchased: ",end="") 
    if ypred[0] == 0:  
        print("No") 
    else: 
        print("Yes")
        
    ch = input("Check for new data(y,n): ")
    if(ch == 'n' or ch =='x'):
        break


Used Input:  {'Gender': [1], 'Age': [32], 'EstimatedSalary': [65333.0]}
User Purchased: Yes


In [316]:

df1.tail()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
396,1,0.785714,0.059259,1.0
397,0,0.761905,0.037037,1.0
398,1,0.428571,0.133333,0.0
399,0,0.738095,0.155556,1.0
400,1,0.333333,0.22403,
