Predicting if a person would buy life insurnace based on his age using logistic regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras

%matplotlib inline

In [None]:
df = pd.read_csv("Insurance_Data.csv")
df.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df[['age','affordibility']]
y = df.bought_insurance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### scaling

In [None]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age']/100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age']/100

In [None]:
model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(2,),activation='sigmoid',kernel_initializer='ones',bias_initializer='zeros')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled,y_train,epochs=500)

In [None]:
model.evaluate(X_test_scaled,y_test)

In [None]:
coef, intercept = model.get_weights()

In [None]:
# weights
coef

In [None]:
#bias
intercept

# Implementation from Scratch

In [6]:
# to calculate the log loss(Binary cross entropy)
def log_loss(y_real, y_predicted):
    epsilon = 1e-15
    # convert the 0 to epsilon
    y_predicted = [max(i,epsilon) for i in y_predicted]
    # convert 1 to 1-epsilon
    y_predicted = np.array([min(i,1-epsilon) for i in y_predicted])
    logLoss = -np.mean(y_real*np.log(y_predicted)+(1-y_real)*np.log(1-y_predicted))
    return logLoss

In [7]:
# apply the sigmoid fucntion on a whole array
def sigmoid_numpy(x):
    return 1/(1+np.exp(-x))

In [21]:
class myNN:
    def __init__(self):
        self.w1 = 1
        self.w2 = 1
        self.bias = 0
    
    def fit(self, X, y, epochs, loss_threshold):
        self.w1, self.w2, self.bias = self.gradient_descent(X['age'], X['affordibility'], y, epochs, loss_threshold)
    
    def predict(self, X_test):
        weighted_sum = self.w1*X_test['age'] + self.w2*X_test['affordibility'] + self.bias
        return sigmoid_numpy(weighted_sum)
    
    def gradient_descent(self, age, affordibility, y_true, epochs, loss_threshold):
        w1 = w2 = 1
        bias = 0
        rate = 0.5 # learning rate
        n = len(age)

        for i in range(epochs):
            weighted_sum = w1*age + w2*affordibility + bias
            y_predicted = sigmoid_numpy(weighted_sum)
            loss = log_loss(y_true,y_predicted)

            #fiding the derivatives
            w1_d = (1/n)*np.dot(np.transpose(age),(y_predicted-y_true))
            w2_d = (1/n)*np.dot(np.transpose(affordibility),(y_predicted-y_true))
            bias_d = np.mean(y_predicted-y_true)

            # updating the weights and bias
            w1 = w1 - rate*w1_d
            w2 = w2 - rate*w2_d
            bias = bias - rate*bias_d
            
            if i%25 == 0:
                print(f'Epoch: {i}, w1: {w1}, w2: {w2}, bias: {bias}, loss: {loss}')

            if(loss<=loss_threshold):
                print(f'Epoch: {i}, w1: {w1}, w2: {w2}, bias: {bias}, loss: {loss}')
                break
        print(f'Epoch: {i}, w1: {w1}, w2: {w2}, bias: {bias}, loss: {loss}')
        return w1,w2,bias


In [23]:
customModel = myNN()
customModel.fit(X_train_scaled, y_train, epochs=500, loss_threshold = 0.515)

Epoch: 0, w1: 0.9735870554090863, w2: 0.9276373851660089, bias: -0.11621401159689329, loss: 0.7533655791117272
Epoch: 25, w1: 1.1626593722282215, w2: 0.6743131347272098, bias: -0.8354207861038951, loss: 0.6229659931083777
Epoch: 50, w1: 1.5302272080895147, w2: 0.7467614156091524, bias: -1.0412437858276409, loss: 0.6082731052450916
Epoch: 75, w1: 1.8835637469266067, w2: 0.7979939232650832, bias: -1.2219301317556364, loss: 0.5954089542992953
Epoch: 100, w1: 2.2207626631778417, w2: 0.8354187456545137, bias: -1.3855501801817134, loss: 0.5840147278521037
Epoch: 125, w1: 2.541543394768044, w2: 0.8643650451290783, bias: -1.5361180415319018, loss: 0.5738645001606684
Epoch: 150, w1: 2.846163007780166, w2: 0.8880773551425616, bias: -1.67625053116322, loss: 0.5647925375477527
Epoch: 175, w1: 3.135181465527246, w2: 0.9085300968765314, bias: -1.807704707270465, loss: 0.5566655711125256
Epoch: 200, w1: 3.4093186848209664, w2: 0.9269229884226572, bias: -1.9317005092755386, loss: 0.549371253786572
Epo

In [24]:
customModel.predict(X_test_scaled)

9     0.806168
25    0.745901
8     0.813828
21    0.205786
0     0.373797
12    0.214041
17    0.781764
22    0.593875
11    0.445886
13    0.231264
dtype: float64