In [93]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler

In [95]:
df = pd.read_csv('../insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [97]:
from sklearn.model_selection import train_test_split

X = df.drop('bought_insurance', axis=1)
y = df['bought_insurance']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=12)

In [99]:
scaler = StandardScaler()

X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X.columns)

### Implementing Gradient Descent From Scratch

In [102]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [104]:
def log_loss(y_actual, y_predicted):
    total_error = 0
    for y, y_hat in zip(y_actual, y_predicted):
        factor = 1e-15
        match(y_hat):
            case 0:
                y_hat += factor
            case 1:
                y_hat -= factor
            case _:
                pass
        total_error += y * np.log(y_hat) + (1-y) * np.log(1-y_hat)
    return -(total_error / len(y_actual))

In [106]:
class ANN:
    def __init__(self):
        self.w1 = 1
        self.w2 = 1
        self.bias = 0
        
    def fit(self,X,y,epochs):
        self.w1, self.w2, self.bias = self.gradient_descent(X["age"],X["affordibility"] , y, epochs)

    def gradient_descent(self, age, aff, y_actual, epochs):
        w1 = 1
        w2 = 1
        bias = 0
        learning_rate = 0.5
        n = len(y_actual)

        age=np.array(age)
        aff=np.array(aff)
        y_actual=np.array(y_actual)
    
        for i in range(epochs):
            weighted_sum = w1 * age + w2 * aff + bias
            y_pred = sigmoid(weighted_sum)
            loss = log_loss(y_actual, y_pred)
            
            w1_d = (1/n) * np.dot(age, (y_pred - y_actual))
            w2_d = (1/n) * np.dot(aff, (y_pred - y_actual))
            bias_d = np.mean(y_pred - y_actual)
            
            w1 = w1 - learning_rate * w1_d
            w2 = w2 - learning_rate * w2_d
            bias = bias - learning_rate * bias_d
    
            print("Epoch",i,"w1",w1,"w2",w2,"bias",bias,"loss",loss)
        return w1, w2, bias

In [108]:
model = ANN()
model.fit(X_train_scaled, y_train, epochs=1000)

Epoch 0 w1 1.0368127271358094 w2 0.9876826976696026 bias -0.01931503994054433 loss 0.4526119588934752
Epoch 1 w1 1.070630398255854 w2 0.976898166745142 bias -0.03682337276235298 loss 0.4490165648905629
Epoch 2 w1 1.1017544761069957 w2 0.9674772438343382 bias -0.0527385276494288 loss 0.44601769975818795
Epoch 3 w1 1.1304510171129172 w2 0.9592688650577545 bias -0.0672455028863004 loss 0.44350591207183543
Epoch 4 w1 1.1569549253431453 w2 0.9521384973551537 bias -0.08050455161703862 loss 0.4413934088504154
Epoch 5 w1 1.1814738599016534 w2 0.945966508806042 bias -0.09265460375689512 loss 0.439609565824381
Epoch 6 w1 1.2041917285313632 w2 0.9406465866856261 bias -0.10381628355058181 loss 0.43809737320077435
Epoch 7 w1 1.225271759321197 w2 0.9360842601744453 bias -0.11409452897638654 loss 0.43681063587932734
Epoch 8 w1 1.2448591734552463 w2 0.932195553154127 bias -0.12358084276085764 loss 0.435711772278362
Epoch 9 w1 1.2630834962734268 w2 0.9289057740235159 bias -0.13235521469199063 loss 0.43

In [110]:
X_train_scaled.shape, y_train.shape

((21, 2), (21,))

### Types of Gradient Descent
<li>Batch Gradient Descent (Batch size = n)</li>
<li>Stochastic Gradient Descent (Batch size = 1)</li>
<li>Mini Batch Gradient Descent (Batch size = 1 < batch < n)</li>

In [113]:
class ANN1:
    def __init__(self):
        self.w1 = 1
        self.w2 = 1
        self.bias = 0
        
    def fit(self,X,y,epochs):
        self.w1, self.w2, self.bias = self.stochastic_gradient_descent(X["age"],X["affordibility"] , y , epochs)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def log_loss(self, y, y_hat):
        factor = 1e-15
        match(y_hat):
            case 0:
                y_hat += factor
            case 1:
                y_hat -= factor
            case _:
                pass
        total_error = y * np.log(y_hat) + (1-y) * np.log(1-y_hat)
        print("Total Error:",total_error)
        return -(total_error / 1)

    def stochastic_gradient_descent(self, age, aff, y_actual, epochs):
        "take one record randomly in one epoch and calculate loss and adjust weight. repeat for each epoch"
        w1 = 1
        w2 = 1
        bias = 0
        learning_rate = 0.5
        n = len(y_actual)
        age = np.array(age)
        aff = np.array(aff)
        y_actual = np.array(y_actual)
    
        for i in range(epochs):
            from random import randint

            r = randint(0, n-1)
            
            weighted_sum = w1 * age[r] + w2 * aff[r] + bias
            y_pred = self.sigmoid(weighted_sum)
            loss = self.log_loss(y_actual[r], y_pred)
            
            w1_d = (1/n) * np.dot(age[r], (y_pred - y_actual[r]))
            w2_d = (1/n) * np.dot(aff[r], (y_pred - y_actual[r]))
            bias_d = y_pred - y_actual[r]
            
            w1 = w1 - learning_rate * w1_d
            w2 = w2 - learning_rate * w2_d
            bias = bias - learning_rate * bias_d
    
            print("Epoch",i,"w1",w1,"w2",w2,"bias",bias,"loss",loss)
        return w1, w2, bias

In [115]:
model = ANN1()
model.fit(X_train_scaled, y_train, epochs=1000)

Total Error: -1.3794450664297975
Epoch 0 w1 0.9918577337549773 w2 0.9887320246862279 bias -0.374140899405737 loss 1.3794450664297975
Total Error: -0.15406154824066165
Epoch 1 w1 0.9971401019105093 w2 0.9908820834834033 bias -0.3027505289309947 loss 0.15406154824066165
Total Error: -0.5253635664004652
Epoch 2 w1 0.9975847760708401 w2 0.9970358707835207 bias -0.0984206958328091 loss 0.5253635664004652
Total Error: -2.723012230782661
Epoch 3 w1 0.9757265057390829 w2 0.9618621809931522 bias 0.3687409926562459 loss 2.723012230782661
Total Error: -0.8086365037422042
Epoch 4 w1 0.9859843941017206 w2 0.9535117357136291 bias 0.09147351377735335 loss 0.8086365037422042
Total Error: -0.10022689177402198
Epoch 5 w1 0.987904184778734 w2 0.9571019696364287 bias 0.043789584356192415 loss 0.10022689177402198
Total Error: -0.15615268510787353
Epoch 6 w1 0.9918369915197911 w2 0.9592789934741917 bias 0.11607529975821625 loss 0.15615268510787353
Total Error: -0.5394174595281647
Epoch 7 w1 0.98117612373753

### Mini Batch Gradient Descent

In [130]:
import random as rd

class ANN2:
    def __init__(self):
        self.w1 = 1
        self.w2 = 1
        self.bias = 0
        
    def fit(self,X,y,epochs, batch_size=10):
        self.w1, self.w2, self.bias = self.gradient_descent(X["age"],X["affordibility"] , y, epochs, batch_size)

    def gradient_descent(self, age, aff, y_actual, epochs, batch_size):
        "take one batch randomly in one epoch and calculate loss on that batch and adjust weight. repeat for each epoch"
        w1 = 1
        w2 = 1
        bias = 0
        learning_rate = 0.5
        
        age = np.array(age)
        aff = np.array(aff)
        y_actual = np.array(y_actual)
        
        for i in range(epochs):
            idx = np.random.randint(0,len(y_actual), batch_size)
        
            weighted_sum = w1 * age[idx]+ w2 * aff[idx]+ bias
            y_pred = sigmoid(weighted_sum)
            loss = log_loss(y_actual[idx], y_pred)
            
            w1_d = (1/batch_size) * np.dot(age[idx], (y_pred - y_actual[idx]))
            w2_d = (1/batch_size) * np.dot(aff[idx], (y_pred - y_actual[idx]))
            bias_d = np.mean(y_pred - y_actual[idx])
            
            w1 = w1 - learning_rate * w1_d
            w2 = w2 - learning_rate * w2_d
            bias = bias - learning_rate * bias_d
    
            print("Epoch",i,"w1",w1,"w2",w2,"bias",bias,"loss",loss)
        return w1, w2, bias

In [132]:
model = ANN2()
model.fit(X_train_scaled, y_train, epochs=1000)

Epoch 0 w1 0.9965870174449794 w2 0.9771917665669527 bias -0.06299421730983369 loss 0.6157640400779388
Epoch 1 w1 0.9494682668054336 w2 0.8938277853300738 bias -0.18448268920916305 loss 0.7609903012997116
Epoch 2 w1 0.9166392530537538 w2 0.8813340686733472 bias -0.1050329938774445 loss 0.6986271877477271
Epoch 3 w1 0.9156469642819953 w2 0.9826166022287166 bias -0.23199931850816344 loss 0.3767405293556861
Epoch 4 w1 1.0131687879793598 w2 1.0238179020575622 bias -0.22102410900133893 loss 0.31790981001170265
Epoch 5 w1 1.028088199959695 w2 0.9445848422733532 bias -0.19359175708088536 loss 0.6175095878801715
Epoch 6 w1 1.0996442859954267 w2 0.9857256354340908 bias -0.2531005059927344 loss 0.26924985058800016
Epoch 7 w1 1.1803081475656634 w2 1.0239227528805679 bias -0.24485631112169468 loss 0.2844639841679045
Epoch 8 w1 1.2122450453903317 w2 0.9969246151639757 bias -0.34035223605376963 loss 0.46782497596850925
Epoch 9 w1 1.273097729407477 w2 1.0470892973475556 bias -0.3231904279692438 loss 0