In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [4]:
df = pd.read_csv("insurance_data.csv")

In [5]:
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [8]:
X = df.drop(["bought_insurance"], axis=1).copy()

In [10]:
y = df["bought_insurance"].copy()

In [7]:
from sklearn.model_selection import train_test_split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
print("X_train size", len(X_train))
print("X_test size", len(X_test))
print("y_train size", len(y_train))
print("y_test size", len(y_test))


X_train size 22
X_test size 6
y_train size 22
y_test size 6


In [24]:
# As we know that the machine learning models perform best when the range is between 0-1 so we will convert age into 0-1
# for both train and test

X_train_scaled = X_train.copy()
X_train_scaled["age"] = X_train_scaled["age"]/100


X_test_scaled = X_test.copy()
X_test_scaled["age"] = X_test_scaled["age"]/100


In [19]:
import tensorflow as tf
from tensorflow import keras

In [25]:
model = keras.Sequential([
    keras.layers.Dense(1,input_shape=(2,),
                      activation="sigmoid",
                      kernel_initializer="ones",
                      bias_initializer="zeros")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=['accuracy'])

In [None]:
model.fit(X_train_scaled,y_train,epochs=5000)

Epoch 1/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.8636 - loss: 0.5433
Epoch 2/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step - accuracy: 0.8636 - loss: 0.5433
Epoch 3/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.8636 - loss: 0.5433
Epoch 4/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.8636 - loss: 0.5432
Epoch 5/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.8636 - loss: 0.5432
Epoch 6/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.8636 - loss: 0.5432
Epoch 7/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - accuracy: 0.8636 - loss: 0.5431
Epoch 8/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - accuracy: 0.8636 - loss: 0.5431
Epoch 9/5000
[1m1/1[0m [32m━━━━━━━━━━━

In [30]:
# Now lets evaluate the model with the test dataset

model.evaluate(X_test_scaled,y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 1.0000 - loss: 0.1689


[0.16885077953338623, 1.0]

In [31]:
# Its giving accuray of 1 which means that the model performed very well on our training dataset

In [32]:
model.predict(X_test_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step


array([[0.8880836 ],
       [0.82330436],
       [0.89542305],
       [0.11223527],
       [0.2900774 ],
       [0.12004019]], dtype=float32)

In [40]:
# as we can see that there are numbers in the array which are predicting if the person will buy the insurance or not less than 0.5
# not purchasing greater than 0.5 will purchase

In [34]:
arr = model.predict(X_test_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step


In [41]:
# Creating a function to show the prediction in 1 or 0

def showans():
    for i in arr:
        print(0 if i<0.5 else 1)

In [39]:
showans()

1
1
1
0
0
0


In [33]:
y_test

9     1
25    1
8     1
21    0
0     0
12    0
Name: bought_insurance, dtype: int64

In [42]:
# As we can see that our pediction matches perfectly with the actual prediction

In [43]:
# But the question here is how will you know that value of weights and biases were used in the model to get that use model.get.weights

In [45]:
weights, bias = model.get_weights()

In [59]:
weights, bias

(array([[7.605953 ],
        [1.4773393]], dtype=float32),
 array([-4.045657], dtype=float32))

In [60]:
print("Weight1 was",weights[0][0])
print("Weight2 was",weights[1][0])
print("Bias was", bias[0])

Weight1 was 7.605953
Weight2 was 1.4773393
Bias was -4.045657


## NOW WE WILL IMPLEMENT THE SAME NEURAL NETWORK WITH PLAIN PYTHON

In [61]:
# Lets start with the basics and create a sigmoid function

import math

# sigmoid converts any positive number to 1 and any negative value to 0
def sigmoid(x):
    ans = 1/(1+math.exp(-x))
    return ans

In [65]:
sigmoid(-10)

4.5397868702434395e-05

In [66]:
# Now lets create the prediction function which is basically a combination of weights and biases

def prediction_fun(age, affor):

    # y = w1*age+ w2*affordability + bias

    weighted_y = weights[0][0]*age + weights[1][0]*affor + bias[0]
    return sigmoid(weighted_y)

    

In [71]:
X_test_scaled

Unnamed: 0,age,affordibility
9,0.61,1
25,0.54,1
8,0.62,1
21,0.26,0
0,0.22,1
12,0.27,0


In [72]:
# Now lets check the 1st case where age = 0.61 and affordability = 1

prediction_fun(	0.61,1)

0.8880836138485964

In [70]:
model.predict(X_test_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step


array([[0.8880836 ],
       [0.82330436],
       [0.89542305],
       [0.11223527],
       [0.2900774 ],
       [0.12004019]], dtype=float32)

In [None]:
# As we can our function predicted 0.88808 which is the same as predicted by the model

## NOW LETS IMPLEMENT GRADIENT DESCENT WITH IN PURE PYTHON

In [86]:
# To implement gradient descent we need some helper function

# log-loss function

def log_loss(y_true, y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted]
    y_predicted_new = np.array(y_predicted_new)

    ans = -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

    return ans
    

In [87]:
# numpy sigmoid function

def sigmoid_numpy(x):
    ans = 1/(1+np.exp(-x))
    return ans

In [88]:
def gradient_descent(age, afford, y_true, epoch):
    # at the end we wnat to return w1,w2,bias

    # lets give w1, w1 and bias any random value lets give them 0
    w1=0
    w2=0
    bias=0

    rate = 0.5
    n = len(age)

    for i in range(epoch):
        weighted_sum = w1*age + w2*afford + bias
        y_predicted = sigmoid_numpy(weighted_sum)

        loss = log_loss(y_true, y_predicted)

        w1d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_true))
        w2d = (1/n)*np.dot(np.transpose(afford),(y_predicted-y_true))

        bias_d = np.mean(y_predicted-y_true)

        w1 = w1 - rate*w1d
        w2 = w2 - rate*w2d

        bias = bias- rate*bias_d


        print(f'Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')


    return w1,w2,bias
        
        
        
    

In [89]:
gradient_descent(X_train_scaled["age"], X_train_scaled["affordibility"], y_train, 1000)

Epoch:0, w1:0.02238636363636364, w2:0.03409090909090909, bias:0.0, loss:0.6931471805599453
Epoch:1, w1:0.04311885778854925, w2:0.06451374661806406, bias:-0.004005135860758641, loss:0.6899825864397562
Epoch:2, w1:0.06255668848163669, w2:0.09198110624229573, bias:-0.011118610800223245, loss:0.6873440584215054
Epoch:3, w1:0.08098286398677615, w2:0.11705630055410784, bias:-0.020633993183957255, loss:0.6850465852814565
Epoch:4, w1:0.09861997489786925, w2:0.14018411647404488, bias:-0.03199573898457511, loss:0.6829747337687819
Epoch:5, w1:0.11564294405550568, w2:0.16171576329138368, bias:-0.04476732987404629, loss:0.6810563644665686
Epoch:6, w1:0.13218917631355345, w2:0.18192877171972238, bias:-0.05860592462494203, loss:0.6792462360010799
Epoch:7, w1:0.14836657585844368, w2:0.20104272516794433, bias:-0.07324234271720938, loss:0.6775158158370572
Epoch:8, w1:0.16425985490123932, w2:0.21923164061597425, bias:-0.08846531611439126, loss:0.6758469578702851
Epoch:9, w1:0.17993548980169932, w2:0.2366

(np.float64(8.245643034501636),
 np.float64(1.4852103553124953),
 np.float64(-4.306193508533563))

In [91]:
weights, bias

(array([[7.605953 ],
        [1.4773393]], dtype=float32),
 array([-4.045657], dtype=float32))

In [92]:
# As we can see that our python code produced weight 1,2 and biases which are very close to the ones which are prodeuced by tensor flow