In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("Social_Network_Ads (1).csv")
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   User ID          400 non-null    int64  
 1   Gender           400 non-null    object 
 2   Age              400 non-null    float64
 3   EstimatedSalary  400 non-null    float64
 4   Purchased        400 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 15.8+ KB


In [4]:
df.describe()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased
count,400.0,400.0,400.0,400.0
mean,15691540.0,37.655,69742.5,0.3575
std,71658.32,10.482877,34096.960282,0.479864
min,15566690.0,18.0,15000.0,0.0
25%,15626760.0,29.75,43000.0,0.0
50%,15694340.0,37.0,70000.0,0.0
75%,15750360.0,46.0,88000.0,1.0
max,15815240.0,60.0,150000.0,1.0


In [5]:
df.isnull().sum()

User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

In [6]:
x = df[["Age","EstimatedSalary"]]
y = df["Purchased"]


In [7]:
x = np.array(x)
y = np.array(y)


In [8]:
print(x.shape)
print(y.shape)


(400, 2)
(400,)


In [9]:
n = len(x)
split = int(n*0.8)

In [10]:
x_train = x[:split]
y_train = y[:split]

In [11]:
x_test = x[split:]
y_test = y[split:]

In [12]:
mean = np.mean(x_train,axis=0)
std = np.std(x_train,axis=0)
x_train_norm = (x_train-mean)/std
x_test_norm = (x_test-mean)/std

In [13]:
def pred(x_train_norm,w,b):
    z = np.dot(x_train_norm,w)+b
    sigmoid_z = 1/(1+(np.exp(-z)))
    return sigmoid_z

In [14]:
def loss(sigmoid_z,y_train):
    loss = -((y_train * np.log(sigmoid_z))+((1-y_train)*(np.log(1-sigmoid_z))))
    loss = (np.sum(loss))/len(y_train)
    return loss

In [15]:
def gradient(x_train_norm,y_train,sigmoid_z):
    err = sigmoid_z-y_train
    dw = np.mean(x_train_norm*err[:,np.newaxis],axis=0)
    db = np.mean(err)
    return dw,db



In [16]:
def gradient_descent(dw,db,a,w,b):
    w = w - a*dw
    b = b - a*db
    return w,b

In [17]:
def logistic_regression(x_train_norm,y_train,a=0.1,i=10000):
    w = np.zeros(x_train_norm.shape[1])
    b = 0
    
    for k in range(i):
        y_pred = pred(x_train_norm,w,b)
        losss = loss(y_pred,y_train)
        dw,db = gradient(x_train_norm, y_train, y_pred)
        w,b = gradient_descent(dw,db,a,w,b)
        if k % 100 == 0:
            print(f"Epoch {k}, Loss {losss}")
    return w,b


In [18]:
def predict_class(x_train_norm,w,b,threshold=0.5):
    probs = pred(x_train_norm,w,b)
    y_pred_class = (probs >= threshold).astype(int)
    return y_pred_class

In [19]:
def accuracy(y_pred_class,y_test):
    accuracy = np.mean(y_pred_class == y_test)
    return accuracy

In [20]:
def confusion_matrix(y_pred_class,y_test):
    tp =0
    tn =0
    fp =0
    fn =0
    for k in range(len(y_test)):
        if(y_pred_class[k]==1 and y_test[k]==1):
            tp+=1
        elif(y_pred_class[k]==0 and y_test[k]==0):
            tn+=1
        elif(y_pred_class[k]==1 and y_test[k]==0):
            fp+=1
        elif(y_pred_class[k]==0 and y_test[k]==1):
            fn+=1
    return tp,tn,fp,fn

In [21]:
def precision(tp,fp):
    return tp / (tp+fp) if (tp+fp) != 0 else 0

In [22]:
def recall(tp,fn):
    return tp / (tp+fn) if (tp+fn) != 0 else 0

In [24]:
w, b =logistic_regression(x_train_norm,y_train,a=0.1,i=1500)
y_pred_class = predict_class(x_test_norm,w,b,threshold=0.5)
tp, tn, fp, fn = confusion_matrix(y_pred_class, y_test)

Accuracy =accuracy(y_pred_class,y_test)
prec = precision(tp,fp)
rec = recall(tp,fn)

print("Accuracy: ",Accuracy)
print("precision ",precision(tp, fp))
print("recall: ",recall(tp,fn))

Epoch 0, Loss 0.6931471805599453
Epoch 100, Loss 0.3662166827914784
Epoch 200, Loss 0.3421984926134011
Epoch 300, Loss 0.33592480762741883
Epoch 400, Loss 0.33368227368562087
Epoch 500, Loss 0.33276029318842826
Epoch 600, Loss 0.3323512063879252
Epoch 700, Loss 0.33216115912115846
Epoch 800, Loss 0.3320702409087127
Epoch 900, Loss 0.33202589248013314
Epoch 1000, Loss 0.33200397322499425
Epoch 1100, Loss 0.3319930408992585
Epoch 1200, Loss 0.33198755381825035
Epoch 1300, Loss 0.33198478755741845
Epoch 1400, Loss 0.3319833886130564
Accuracy:  0.7625
precision  0.9705882352941176
recall:  0.6470588235294118
