In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib as mpl 
import seaborn as sns 
import tensorflow as tf
from tensorflow import keras
import os 

In [2]:
# set locations 
data_location = "data/video12"
output_location = "output/video12"

In [3]:
# read the insurance dataset 
df = pd.read_csv(os.path.join(data_location, "insurance_data.csv"))
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [4]:
df.shape

(28, 3)

In [5]:
# get names of the columns which are features and labels 
features = ["age", "affordibility"]

# import sklearn to split train and test dataset 
from sklearn.model_selection import train_test_split    
X_train_full, X_test, y_train_full, y_test = train_test_split(df[features], df["bought_insurance"], random_state=42, test_size=0.2)

In [6]:
X_train_full.shape, X_test.shape, y_train_full.shape, y_test.shape

((22, 2), (6, 2), (22,), (6,))

In [7]:
# create validation data 
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, )

In [8]:
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((19, 2), (3, 2), (19,), (3,))

In [9]:
X_train.head()

Unnamed: 0,age,affordibility
18,19,0
4,46,1
23,45,1
7,60,0
24,50,1


In [10]:
type(X_train["age"])

pandas.core.series.Series

In [11]:
type(X_train.iloc[:, 0:1])

pandas.core.frame.DataFrame

In [12]:
# # scale the input age 
# from sklearn.preprocessing import StandardScaler

# std_scaler = StandardScaler()

# # standard scaler takes entire column as input 
# X_train["age"] = std_scaler.fit_transform(X_train.iloc[:, 0:1])
# X_val["age"] = std_scaler.transform(X_val.iloc[:, 0:1])

# # using same scale perform transformation for test dataset 
# X_test["age"] = std_scaler.transform(X_test.iloc[:, 0:1])

In [13]:
# using standard scaler it is giving me the accuracy of 76% on training set and validation set is on 100% 
# let's divide 100 from the age 
X_train["age"] = X_train["age"]/100.0
X_test["age"] = X_test["age"]/100.0
X_val["age"] = X_val["age"]/100.0
X_train_full["age"] = X_train_full["age"]/100.0

In [14]:
X_train.head()

Unnamed: 0,age,affordibility
18,0.19,0
4,0.46,1
23,0.45,1
7,0.6,0
24,0.5,1


In [15]:
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


# Training Models

In [16]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [17]:
# build a simple neural network - just 2 input neurons in input layer, one in output layer 
model = keras.models.Sequential()
model.add(keras.layers.Input(shape=[2]))
model.add(keras.layers.Dense(1, activation="sigmoid"))

# compile the model 
model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer="adam")

model.summary()

In [18]:
# create checkpoints 
FILE_NAME = os.path.join(output_location, "insurance_no_hidden.keras")

# create checkpoints - one to save the best model and another one with early stopping 
early_stopping_chkp = keras.callbacks.EarlyStopping(patience=25,
                                                    restore_best_weights=True)

best_estimator_chkp = keras.callbacks.ModelCheckpoint(FILE_NAME, save_best_only=True)

In [19]:
model.fit(X_train_full, y_train_full, callbacks=[early_stopping_chkp, best_estimator_chkp], epochs=5000, validation_data=(X_val, y_val))

Epoch 1/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step - accuracy: 0.3636 - loss: 0.7012 - val_accuracy: 0.3333 - val_loss: 0.6974
Epoch 2/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.3636 - loss: 0.7010 - val_accuracy: 0.3333 - val_loss: 0.6968
Epoch 3/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.3636 - loss: 0.7009 - val_accuracy: 0.3333 - val_loss: 0.6963
Epoch 4/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.3636 - loss: 0.7007 - val_accuracy: 0.3333 - val_loss: 0.6957
Epoch 5/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.3636 - loss: 0.7006 - val_accuracy: 0.3333 - val_loss: 0.6952
Epoch 6/5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.3636 - loss: 0.7004 - val_accuracy: 0.3333 - val_loss: 0.6946
Epoch 7/5000
[1m1/1[0m [32m━━━

<keras.src.callbacks.history.History at 0x17c99e590>

``` 
Much better than using standard scaler... may be minmax will do the trick here... but we can check that later. 


In [20]:
# get the best model 
best_model = keras.models.load_model(FILE_NAME)

In [21]:
best_model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step - accuracy: 1.0000 - loss: 0.2568


[0.2568453252315521, 1.0]

In [22]:
# get the weights and biases 
weights, biases = best_model.get_weights()

In [23]:
weights

array([[4.963037 ],
       [1.3633444]], dtype=float32)

In [24]:
biases

array([-2.9094365], dtype=float32)

In [25]:
# lets do some predictions
y_pred = best_model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


In [26]:
y_pred

array([[0.81477803],
       [0.75656676],
       [0.82215124],
       [0.16533633],
       [0.38835695],
       [0.17229961]], dtype=float32)

In [27]:
def get_predicted_value(y_pred):
    return [0 if i < 0.5 else 1 for i in y_pred]

In [28]:
y_pred_value = get_predicted_value(y_pred)

In [29]:
y_pred_value

[1, 1, 1, 0, 0, 0]

In [30]:
y_test

9     1
25    1
8     1
21    0
0     0
12    0
Name: bought_insurance, dtype: int64

In [31]:
X_test

Unnamed: 0,age,affordibility
9,0.61,1
25,0.54,1
8,0.62,1
21,0.26,0
0,0.22,1
12,0.27,0


In [32]:
y_pred

array([[0.81477803],
       [0.75656676],
       [0.82215124],
       [0.16533633],
       [0.38835695],
       [0.17229961]], dtype=float32)

``` 
How does my prediction function looks like? 


In [33]:
def sigmoid_function(z):
    return 1/(1+np.exp(-z))

In [34]:
def prediction_function(age, affordibility):
    weighted_sum = weights[0] * age + weights[1] * affordibility + biases
    return sigmoid_function(weighted_sum)

In [35]:
y_pred_calculated = prediction_function(0.61, 1)
y_pred_calculated

array([0.814778], dtype=float32)

In [36]:
y_pred_calculated = prediction_function(.54, 1)
y_pred_calculated

array([0.75656676], dtype=float32)

# Implementing Gradient Descent

In [70]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [71]:
# first implement log loss 
def log_loss(y_target, y_predicted):
    error = 0 
    epsilon = 1e-15

    y_pred_scaled = [max(i, epsilon) for i in y_predicted]
    y_pred_scaled = [min(i, 1-epsilon) for i in y_pred_scaled]

    for yi, yj in zip(y_target, y_pred_scaled):
        error = error + ((yi * np.log(yj)) + (1-yi) * np.log(1-yj))
    
    return -1 * (error/len(y_target))

In [72]:
# implement sigmoid function for vectors - this is same as previous sigmoid function. just defined again; even if you pass vectors to sigmoid_function it will return array,
def sigmoid_vectos(Z):
    return 1/(1+np.exp(-Z))

In [73]:
sigmoid_vectos(np.array([12, 0, 1]))

array([0.99999386, 0.5       , 0.73105858])

$w^{(next cycle)} = w - \eta * 1/n \sum_{i=0}^{n} x_i (\^y_i - y_i) $<br>
$b^{(next cycle)} = b - \eta * 1/n \sum_{i=0}^{n} (\^y_i - y_i)$

In [74]:
# lets implement gradient descent 
def gradient_descent(age, affordibility, y_values, epochs, loss_threshold):
    w1=w2=1
    bias=0
    rate = 0.5
    n = len(age)

    for i in range(epochs):

        # calculate weighted sum using weights and bias 
        weighted_sum = w1*age + w2*affordibility + bias

        # predict the value 
        y_pred = sigmoid_vectos(weighted_sum)

        # calculate loss 
        loss = log_loss(y_values, y_pred)

        # calculate derivatives 
        w1d = (1/n) * np.dot(np.transpose(age), (y_pred - y_values))
        w2d = (1/n) * np.dot(np.transpose(affordibility), (y_pred - y_values))

        bias_d = np.mean(y_pred - y_values)

        w1 = w1 - rate * w1d 
        w2 = w2 - rate * w2d 
        bias = bias - rate * bias_d

        print (f"Epoch {i} | Age Weight: {w1} | Affodibility weight: {w2} | Bias: {bias} | Loss: {loss}")

        if loss <= loss_threshold:
            break


    return w1, w2, bias



In [75]:
gradient_descent(X_train_full["age"], X_train_full["affordibility"], y_train_full, 1000, 0.4925)

Epoch 0 | Age Weight: 0.9736899318847281 | Affodibility weight: 0.931388810977659 | Bias: -0.11748951666770448 | Loss: 0.7428288579142563
Epoch 1 | Age Weight: 0.9536535852311093 | Affodibility weight: 0.8740290167758512 | Bias: -0.21881533456146035 | Loss: 0.7072146449948488
Epoch 2 | Age Weight: 0.9393731039296968 | Affodibility weight: 0.8271852202997496 | Bias: -0.3053620401943441 | Loss: 0.6814881914786812
Epoch 3 | Age Weight: 0.930193258899806 | Affodibility weight: 0.7897792032048467 | Bias: -0.37884372361582785 | Loss: 0.6633428084673968
Epoch 4 | Age Weight: 0.9254091137248937 | Affodibility weight: 0.7605726653866934 | Bias: -0.441082368200183 | Loss: 0.650742850709519
Epoch 5 | Age Weight: 0.9243325693598606 | Affodibility weight: 0.738313053647322 | Bias: -0.4938425798625155 | Loss: 0.6420508089402462
Epoch 6 | Age Weight: 0.9263332963572349 | Affodibility weight: 0.7218280753843739 | Bias: -0.5387319906498417 | Loss: 0.6360356979531206
Epoch 7 | Age Weight: 0.930858097563

(np.float64(5.006298439301197),
 np.float64(1.226574207206038),
 np.float64(-2.79443192685105))

In [76]:
weights

array([[4.963037 ],
       [1.3633444]], dtype=float32)

In [77]:
biases

array([-2.9094365], dtype=float32)