In batch gradient descent, we use the entire dataset for 1 epoch.

In [22]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math

In [3]:
df = pd.read_csv('insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [7]:
X_train,X_test,y_train,y_test = train_test_split(df[['age','affordibility']],df['bought_insurance'],test_size=0.2, random_state=2024)

In [12]:
X_train['age'] = X_train['age']/100
X_test['age'] = X_test['age']/100

In [14]:
X_test.head()

Unnamed: 0,age,affordibility
20,0.21,1
17,0.58,1
22,0.4,1
16,0.25,0
23,0.45,1


In [18]:
model = keras.Sequential([
    keras.layers.Dense(1,input_shape=(2,),activation='sigmoid',kernel_initializer='ones',bias_initializer='zeros')
])
model.compile(
    optimizer='Adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model.fit(X_train,y_train,epochs=2000)

Epoch 1/2000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 213ms/step - accuracy: 0.4545 - loss: 0.7142
Epoch 2/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4545 - loss: 0.7138
Epoch 3/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4545 - loss: 0.7134
Epoch 4/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4545 - loss: 0.7129
Epoch 5/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4545 - loss: 0.7125
Epoch 6/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4545 - loss: 0.7121
Epoch 7/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4545 - loss: 0.7117
Epoch 8/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4545 - loss: 0.7112
Epoch 9/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x28dde2420>

In [19]:
model.evaluate(X_test,y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.5000 - loss: 0.7207


[0.7206504940986633, 0.5]

In [30]:
y_pred = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


In [31]:
y_pred

array([[0.5318349 ],
       [0.709596  ],
       [0.6273389 ],
       [0.28367743],
       [0.6512001 ],
       [0.6558871 ]], dtype=float32)

In [21]:
coef,intercepts = model.get_weights()
coef,intercepts

(array([[2.0700285],
        [1.136606 ]], dtype=float32),
 array([-1.4438], dtype=float32))

Now, we will try to get these weights and biases from scratch without using any modules

In [23]:
def sigmoid(x):
    return 1/(1+math.exp(-x))

In [34]:
#normal sigmoid takes an individual value, however numpy_sigmoid can take an array of values
def numpy_sigmoid(X):
    return 1+(1/np.exp(-X))

In [24]:
sigmoid(2)

0.8807970779778823

In [26]:
def prediction_function(age,affordibility):
    weighted_sum =  coef[0]*age+coef[1]*affordibility+intercepts[0]
    return sigmoid(weighted_sum)

In [27]:
prediction_function(0.47,1)

  return 1/(1+math.exp(-x))


0.6605440240873299

In [51]:
#log-loss/binary-crossentropy = -1/n(Summation of (y_true*log(y_pred))+((1-y_true)*log(1-y_pred)))
def log_loss(y_true, y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

In [52]:
def gradient_descent(age, affordability, y_true, epochs, loss_thresold):
    w1 = w2 = 1
    bias = 0
    rate = 0.5
    n = len(age)
    for i in range(epochs):
        weighted_sum = w1 * age + w2 * affordability + bias
        y_predicted = numpy_sigmoid(weighted_sum)
        loss = log_loss(y_true, y_predicted)

        w1d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_true)) 
        w2d = (1/n)*np.dot(np.transpose(affordability),(y_predicted-y_true)) 

        bias_d = np.mean(y_predicted-y_true)
        w1 = w1 - rate * w1d
        w2 = w2 - rate * w2d
        bias = bias - rate * bias_d

        print (f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

        if loss<=loss_thresold:
            break

    return w1, w2, bias

In [53]:
gradient_descent(X_train['age'],X_train['affordibility'],y_train,1000, 0.4631)

Epoch:0, w1:0.22543649885372807, w2:-0.4472401796164427, bias:-1.871649191155676, loss:18.839768723095027
Epoch:1, w1:0.1217327535366667, w2:-0.5953164479458668, bias:-2.2092183677171517, loss:18.839768723095027
Epoch:2, w1:0.027708833816226575, w2:-0.7292298540734815, bias:-2.5230974977892386, loss:18.839768723095027
Epoch:3, w1:-0.06067148363808317, w2:-0.8553192332621833, bias:-2.823010446628086, loss:18.839768723095027
Epoch:4, w1:-0.14553958162233654, w2:-0.9767922703717441, bias:-3.114142258402597, loss:18.839768723095027
Epoch:5, w1:-0.22813419117299338, w2:-1.095435078448573, bias:-3.3995343707457737, loss:18.839768723095027
Epoch:6, w1:-0.3092189103919887, w2:-1.2123009639762077, bias:-3.6810785459154594, loss:18.839768723095027
Epoch:7, w1:-0.38928293469948827, w2:-1.328034028265191, bias:-3.9599975017147964, loss:18.839768723095027
Epoch:8, w1:-0.46864803629031015, w2:-1.4430375609228168, bias:-4.2371025789345795, loss:18.839768723095027
Epoch:9, w1:-0.5475298669707273, w2:-

(-77.5004169073626, -114.05927811397088, -274.52094708961675)