In [376]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [377]:
df = pd.read_csv("insurance_data.csv")
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [378]:
from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(df.drop("bought_insurance", axis="columns"),df["bought_insurance"],test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(df[["age","affordibility"]], df["bought_insurance"], test_size=0.2, random_state=42)

In [379]:
X_train.head()

Unnamed: 0,age,affordibility
17,58,1
22,40,1
11,28,1
13,29,0
15,55,1


In [380]:
X_train_scaled =  X_train.copy()
X_train_scaled["age"] = X_train_scaled["age"]/100
X_train_scaled

Unnamed: 0,age,affordibility
17,0.58,1
22,0.4,1
11,0.28,1
13,0.29,0
15,0.55,1
1,0.25,0
4,0.46,1
5,0.56,1
2,0.47,1
16,0.25,0


In [381]:
X_test_scaled =  X_test.copy()
X_test_scaled["age"] = X_test_scaled["age"]/100
X_test_scaled.head()

Unnamed: 0,age,affordibility
9,0.61,1
25,0.54,1
8,0.62,1
21,0.26,0
0,0.22,1


In [382]:
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
# # X_test_scaled = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)

In [383]:
X_train_scaled

Unnamed: 0,age,affordibility
17,0.58,1
22,0.4,1
11,0.28,1
13,0.29,0
15,0.55,1
1,0.25,0
4,0.46,1
5,0.56,1
2,0.47,1
16,0.25,0


In [384]:
X_test_scaled

Unnamed: 0,age,affordibility
9,0.61,1
25,0.54,1
8,0.62,1
21,0.26,0
0,0.22,1
12,0.27,0


In [385]:
y_train

17    1
22    1
11    0
13    0
15    1
1     0
4     1
5     1
2     1
16    1
23    1
3     0
26    0
24    1
18    0
27    0
20    0
7     1
10    0
14    1
19    0
6     0
Name: bought_insurance, dtype: int64

In [386]:
y_test

9     1
25    1
8     1
21    0
0     0
12    0
Name: bought_insurance, dtype: int64

In [387]:
class myNN():
    
    def __init__(self):
        # self.epsilon = 1e-15
        self.w1 = 1.0
        self.w2 = 1.0
        self.bias = 0.0
        self.learning_rate = 0.1
        
        
    def fit(self, X, y, epochs):
        age = np.array(X["age"])
        affordibility = np.array(X["affordibility"])
        y = np.array(y)
        self.w1, self.w2, self.bias = self.gradient_descent(age, affordibility, y, epochs)
        return self.w1, self.w2, self.bias
        
    def predict(self,X_test):
        X_test = X_test.copy()
        age = np.array(X_test["age"])
        affordibility = np.array(X_test["affordibility"])
        weighted_sum = self.w1 * age + self.w2 * affordibility + self.bias
        return self.sigmoid_numpy(weighted_sum)
    
    def sigmoid_numpy(self,x):
        return 1/(1+np.exp(-x))
    
    def log_loss(self,y_truth, y_predicted):
        epsilon = 1e-15
        y_predicted_new = [max(i, epsilon) for i in y_predicted]
        y_predicted_new = [min(i, 1-epsilon) for i in y_predicted_new]
        y_predicted_new = np.array(y_predicted_new)

        return -np.mean(y_truth*np.log(y_predicted_new) + (1-y_truth)*np.log(1-y_predicted_new))

    def gradient_descent(self,age, affordibility, y_truth, epochs):
        w1 = self.w1
        w2 = self.w2
        bias = self.bias
        learning_rate = 0.1
        n = len(age)
        previous_loss = float('inf')
        print(previous_loss)
        threshold = 1e-6

        for i in range(epochs):

            weighted_sum = w1*age + w2*affordibility + bias
            y_predicted = self.sigmoid_numpy(weighted_sum)
            loss = self.log_loss(y_truth, y_predicted)

            w1d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_truth))
            w2d = (1/n)*np.dot(np.transpose(affordibility),(y_predicted-y_truth))
            bias_d = np.mean(y_predicted-y_truth)

            w1 = w1 - learning_rate*w1d
            w2 = w2 - learning_rate*w2d
            bias =  bias - learning_rate*bias_d

            if i%100 == 0:
                print(f'Epochs:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}')

            if abs(previous_loss - loss) < threshold:
                print(f"Converged at epoch {i} with loss: {loss}")
                break
            previous_loss = loss
        return w1, w2, bias
                           
                           

In [388]:
# model = myNN()
# # model.fit()
# weighted_sum = X_train_scaled["age"] + X_train_scaled["affordibility"] + 0
# y_predicted = model.sigmoid_numpy(weighted_sum)
# loss = model.log_loss(y_train, y_predicted)
# print(loss)

In [389]:
model = myNN()
w1, w2, bias = model.gradient_descent(X_train_scaled["age"], X_train_scaled["affordibility"], y_train,5000)
w1, w2, bias

inf
Epochs:0, w1:0.9947379863769457, w2:0.9862777621955318, bias:-0.023497903333540897, loss:0.7428288579142563
Epochs:100, w1:1.07488474037868, w2:0.7273263168770392, bias:-0.8131926464694498, loss:0.6151041770515625
Epochs:200, w1:1.3664246684802364, w2:0.8335379341213822, bias:-1.0293486742247717, loss:0.6007467518783048
Epochs:300, w1:1.6568315818082888, w2:0.9147609736680136, bias:-1.2096278640721683, loss:0.5883808660568577
Epochs:400, w1:1.9405729243946885, w2:0.9741496163493341, bias:-1.369861990746796, loss:0.5773951673642531
Epochs:500, w1:2.2163721698920695, w2:1.0184369895567658, bias:-1.515020762549334, loss:0.5674749005918829
Epochs:600, w1:2.4835356127954133, w2:1.0522916303476186, bias:-1.6484840614138876, loss:0.5584331741336404
Epochs:700, w1:2.7417281817704513, w2:1.0789219887076804, bias:-1.7726440190079296, loss:0.5501474803918115
Epochs:800, w1:2.9908574231344245, w2:1.1005430972692078, bias:-1.8892166658646743, loss:0.542529328259477
Epochs:900, w1:3.230994345673

(np.float64(8.399177791979584),
 np.float64(1.5006652252655839),
 np.float64(-4.381000223096987))

In [390]:
w1, w2, bias = model.fit(X_train_scaled,y_train,5000)
w1, w2, bias

inf
Epochs:0, w1:0.9947379863769457, w2:0.9862777621955318, bias:-0.023497903333540897, loss:0.7428288579142563
Epochs:100, w1:1.07488474037868, w2:0.7273263168770392, bias:-0.8131926464694498, loss:0.6151041770515625
Epochs:200, w1:1.3664246684802364, w2:0.8335379341213822, bias:-1.0293486742247717, loss:0.6007467518783048
Epochs:300, w1:1.6568315818082888, w2:0.9147609736680136, bias:-1.2096278640721683, loss:0.5883808660568577
Epochs:400, w1:1.9405729243946885, w2:0.9741496163493341, bias:-1.369861990746796, loss:0.5773951673642531
Epochs:500, w1:2.2163721698920695, w2:1.0184369895567658, bias:-1.515020762549334, loss:0.5674749005918829
Epochs:600, w1:2.4835356127954133, w2:1.0522916303476186, bias:-1.6484840614138876, loss:0.5584331741336404
Epochs:700, w1:2.7417281817704513, w2:1.0789219887076804, bias:-1.7726440190079296, loss:0.5501474803918115
Epochs:800, w1:2.9908574231344245, w2:1.1005430972692078, bias:-1.8892166658646743, loss:0.542529328259477
Epochs:900, w1:3.230994345673

(np.float64(8.399177791979584),
 np.float64(1.5006652252655839),
 np.float64(-4.381000223096987))

In [218]:
model.predict(X_test_scaled)

9     0.941048
25    0.882516
8     0.946746
21    0.058896
0     0.193184
12    0.065156
dtype: float64