**Problem**

To predict the cost of medical insurance for individuals based on a number of different parameters such as, `age`, `sex`, `bmi`, `children`, `smoking_status` and `residential_region`.

In [206]:
# import libraties
import torch
import numpy as np
import pandas as pd
from torch import nn
from datetime import datetime
from sklearn.compose import make_column_transformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from torch.utils.tensorboard import SummaryWriter

# check torch version
print(f"PyTorch version: {torch.__version__}")

# check GPU or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# modified date
print(f"Modified date: {datetime.now()}")

PyTorch version: 1.12.1+cu102
Device: cuda
Modified date: 2023-06-22 11:23:53.500802


#### Dataset

In [207]:
# read in the insurance dataset
raw_insurance_data = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")

In [208]:
# top-4 rows
raw_insurance_data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [209]:
X = raw_insurance_data.drop("charges", axis=1)
y = raw_insurance_data["charges"]

X.shape, y.shape

((1338, 6), (1338,))

#### Split data

In [210]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

len(X_train), len(X_test), len(y_train), len(y_test)

(1070, 268, 1070, 268)

In [211]:
X_train.head()

Unnamed: 0,age,sex,bmi,children,smoker,region
560,46,female,19.95,2,no,northwest
1285,47,female,24.32,0,no,northeast
1142,52,female,24.86,0,no,southeast
969,39,female,34.32,5,no,southeast
486,54,female,21.47,3,no,northwest


#### Preprocessing (Normalization and standardization)

In [212]:
numerical_feats = ['age', "bmi", "children"]
categorical_feats = ["sex", "smoker", "region"]

ct = make_column_transformer(
    ((StandardScaler(), numerical_feats)),
    (OneHotEncoder(), categorical_feats))


# fit with x_train
ct.fit(X_train)

tranformed_X_train = ct.transform(X_train)
tranformed_X_test = ct.transform(X_test)

In [213]:
tranformed_X_train.shape, tranformed_X_test.shape

((1070, 11), (268, 11))

In [214]:
# convert to Torch.Tensor
tranformed_X_train, tranformed_X_test, y_train, y_test = torch.Tensor(np.array(tranformed_X_train)), torch.Tensor(np.array(tranformed_X_test)), torch.Tensor(np.array(y_train)), torch.Tensor(np.array(y_test))

#### Modeling

> https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html

In [215]:
class InsureanceRegressionNeuralNetworkV1(nn.Module):
    def __init__(self):
        super().__init__()

        self.weight = nn.Parameter(torch.randn(size=(1, 11)), requires_grad=True)

        self.bias = nn.Parameter(torch.randn(1), requires_grad=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return  torch.matmul(x, self.weight.T)  + self.bias # y = x * weight.T + bias

In [216]:
model_01 = InsureanceRegressionNeuralNetworkV1()
print(f"weight's shape: {model_01.state_dict()['weight'].shape}")
print(f"bias's shape: {model_01.state_dict()['bias'].shape}")

weight's shape: torch.Size([1, 11])
bias's shape: torch.Size([1])


In [217]:
class InsureanceRegressionNeuralNetworkV2(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Linear(in_features=11, out_features=1)
    
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return self.linear(X)

In [218]:
model_02 = InsureanceRegressionNeuralNetworkV2()
print(f"weight's shape: {model_02.state_dict()['linear.weight'].shape}")
print(f"bias's shape: {model_02.state_dict()['linear.bias'].shape}")

weight's shape: torch.Size([1, 11])
bias's shape: torch.Size([1])


**Model 01**

In [219]:
# instantiate a model
insurance_model_v1 = InsureanceRegressionNeuralNetworkV1()

# tensorboard
writer =SummaryWriter()

# copy to device
insurance_model_v1.to(device)

# copy data to device
tranformed_X_train = tranformed_X_train.to(device)
train_labels = y_train.to(device)
tranformed_X_test = tranformed_X_test.to(device)
test_labels = y_test.to(device)

# loss function
loss_fn = nn.L1Loss()

# optimizer 
opitmizer = torch.optim.SGD(params=insurance_model_v1.parameters(), lr=0.01)

In [220]:
# set manual seed
torch.cuda.manual_seed(42)

# num of epochs
epochs = 500


# start training
for epoch in range(epochs):

    # train mode
    insurance_model_v1.train()

    # forward computation
    y_train_preds = insurance_model_v1(tranformed_X_train)

    # calculate loss
    loss = loss_fn(y_train_preds, train_labels)

    # add loss to tensorboar
    writer.add_scalar("Loss/train", loss, epoch)

    # reset gradient descent to zero
    opitmizer.zero_grad()

    # backpropagation
    loss.backward()

    # update parameters 
    opitmizer.step()

    # evaluation mode
    insurance_model_v1.eval()
    with torch.inference_mode():

        # forward computation
        y_test_preds = insurance_model_v1(tranformed_X_test)

        # calculate loss
        loss_test = loss_fn(y_test_preds, test_labels)


        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | train loss: {loss} | test loss: {loss_test}")

    
    # flush
    writer.flush()
    writer.close()

Epoch: 0 | train loss: 13346.0986328125 | test loss: 12968.00390625
Epoch: 10 | train loss: 13345.85546875 | test loss: 12967.7607421875
Epoch: 20 | train loss: 13345.61328125 | test loss: 12967.5185546875
Epoch: 30 | train loss: 13345.37109375 | test loss: 12967.2763671875
Epoch: 40 | train loss: 13345.12890625 | test loss: 12967.0341796875
Epoch: 50 | train loss: 13344.88671875 | test loss: 12966.791015625
Epoch: 60 | train loss: 13344.64453125 | test loss: 12966.548828125
Epoch: 70 | train loss: 13344.4013671875 | test loss: 12966.306640625
Epoch: 80 | train loss: 13344.1591796875 | test loss: 12966.064453125
Epoch: 90 | train loss: 13343.916015625 | test loss: 12965.8212890625
Epoch: 100 | train loss: 13343.6748046875 | test loss: 12965.5791015625
Epoch: 110 | train loss: 13343.431640625 | test loss: 12965.3349609375
Epoch: 120 | train loss: 13343.189453125 | test loss: 12965.0927734375


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 130 | train loss: 13342.9462890625 | test loss: 12964.8505859375
Epoch: 140 | train loss: 13342.7041015625 | test loss: 12964.6083984375
Epoch: 150 | train loss: 13342.462890625 | test loss: 12964.3662109375
Epoch: 160 | train loss: 13342.220703125 | test loss: 12964.123046875
Epoch: 170 | train loss: 13341.9775390625 | test loss: 12963.87890625
Epoch: 180 | train loss: 13341.7353515625 | test loss: 12963.63671875
Epoch: 190 | train loss: 13341.4921875 | test loss: 12963.39453125
Epoch: 200 | train loss: 13341.2509765625 | test loss: 12963.15234375
Epoch: 210 | train loss: 13341.0078125 | test loss: 12962.91015625
Epoch: 220 | train loss: 13340.765625 | test loss: 12962.66796875
Epoch: 230 | train loss: 13340.5234375 | test loss: 12962.4248046875
Epoch: 240 | train loss: 13340.2802734375 | test loss: 12962.181640625
Epoch: 250 | train loss: 13340.0380859375 | test loss: 12961.9384765625
Epoch: 260 | train loss: 13339.796875 | test loss: 12961.6962890625
Epoch: 270 | train loss: 

**Model 02**

In [221]:
# instantiate a model
insurance_model_v2 = InsureanceRegressionNeuralNetworkV2()

# tensorboard
writer =SummaryWriter()

# copy to device
insurance_model_v2.to(device)

# copy data to device
tranformed_X_train = tranformed_X_train.to(device)
train_labels = y_train.to(device)
tranformed_X_test = tranformed_X_test.to(device)
test_labels = y_test.to(device)

# loss function
loss_fn = nn.L1Loss()

# optimizer 
opitmizer = torch.optim.SGD(params=insurance_model_v2.parameters(), lr=0.01)

In [222]:
# set manual seed
torch.cuda.manual_seed(42)

# num of epochs
epochs = 500


# start training
for epoch in range(epochs):

    # train mode
    insurance_model_v2.train()

    # forward computation
    y_train_preds = insurance_model_v2(tranformed_X_train)

    # calculate loss
    loss = loss_fn(y_train_preds, train_labels)

    # add loss to tensorboar
    writer.add_scalar("Loss/train", loss, epoch)

    # reset gradient descent to zero
    opitmizer.zero_grad()

    # backpropagation
    loss.backward()

    # update parameters 
    opitmizer.step()

    # evaluation mode
    insurance_model_v2.eval()
    with torch.inference_mode():

        # forward computation
        y_test_preds = insurance_model_v2(tranformed_X_test)

        # calculate loss
        loss_test = loss_fn(y_test_preds, test_labels)


        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | train loss: {loss} | test loss: {loss_test}")

    
    # flush
    writer.flush()
    writer.close()

Epoch: 0 | train loss: 13345.896484375 | test loss: 12968.0732421875
Epoch: 10 | train loss: 13345.6533203125 | test loss: 12967.83203125
Epoch: 20 | train loss: 13345.4111328125 | test loss: 12967.5888671875
Epoch: 30 | train loss: 13345.16796875 | test loss: 12967.345703125
Epoch: 40 | train loss: 13344.9267578125 | test loss: 12967.103515625
Epoch: 50 | train loss: 13344.68359375 | test loss: 12966.861328125
Epoch: 60 | train loss: 13344.44140625 | test loss: 12966.6181640625
Epoch: 70 | train loss: 13344.19921875 | test loss: 12966.375
Epoch: 80 | train loss: 13343.95703125 | test loss: 12966.1328125
Epoch: 90 | train loss: 13343.7138671875 | test loss: 12965.890625
Epoch: 100 | train loss: 13343.4716796875 | test loss: 12965.6484375
Epoch: 110 | train loss: 13343.2294921875 | test loss: 12965.40625
Epoch: 120 | train loss: 13342.9873046875 | test loss: 12965.162109375
Epoch: 130 | train loss: 13342.744140625 | test loss: 12964.919921875
Epoch: 140 | train loss: 13342.5029296875 | 

#### Evaluation

In [223]:
tranformed_X_test[0], y_test[0]

(tensor([ 0.4011, -0.8915,  0.7343,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000,
          0.0000,  0.0000,  0.0000], device='cuda:0'),
 tensor(9095.0684))

In [224]:
# evaluation
# set evaluation mode
insurance_model_v1.eval()
with torch.inference_mode():
    y_test_preds = insurance_model_v1(tranformed_X_test[:1])

y_test_preds

tensor([[10.3281]], device='cuda:0')

In [225]:
# evaluation
# set evaluation mode
insurance_model_v2.eval()
with torch.inference_mode():
    y_test_preds = insurance_model_v2(tranformed_X_test[:1])

y_test_preds

tensor([[12.6507]], device='cuda:0')

In [226]:
# check trained weights and bias
insurance_model_v1.state_dict()

OrderedDict([('weight',
              tensor([[-1.0546,  2.2755,  0.8380,  2.7655,  1.8501,  1.8532,  1.9441,  1.0363,
                        0.9593,  2.7931,  0.8380]], device='cuda:0')),
             ('bias', tensor([6.5094], device='cuda:0'))])

In [227]:
# check trained weights and bias
insurance_model_v2.state_dict()

OrderedDict([('linear.weight',
              tensor([[-0.1602,  0.1303, -0.1746,  2.5801,  2.6252,  3.8443,  1.0082,  1.5110,
                        1.3937,  1.3897,  1.4103]], device='cuda:0')),
             ('linear.bias', tensor([5.0240], device='cuda:0'))])

In [228]:
# release models and GPU
del insurance_model_v1
del insurance_model_v2
torch.cuda.empty_cache()