**Problem**

To predict the cost of medical insurance for individuals based on a number of different parameters such as, `age`, `sex`, `bmi`, `children`, `smoking_status` and `residential_region`.

In [119]:
# import libraties
import torch
import numpy as np
import pandas as pd
from torch import nn
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from torch.utils.tensorboard import SummaryWriter

# check torch version
print(f"PyTorch version: {torch.__version__}")

# check GPU or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# modified date
print(f"Modified date: {datetime.now()}")

PyTorch version: 1.12.1+cu102
Device: cuda
Modified date: 2023-06-22 11:24:13.625810


#### Dataset

In [120]:
# read in the insurance dataset
raw_insurance_data = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")

In [121]:
# top-4 rows
raw_insurance_data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


#### Preprocessing

In [122]:
# using get_dummies to convert categories to numbers
insurance_data = pd.get_dummies(raw_insurance_data, dtype=float)

In [123]:
# top-4 rows
insurance_data.head()

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,18,33.77,1,1725.5523,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,28,33.0,3,4449.462,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,33,22.705,0,21984.47061,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
4,32,28.88,0,3866.8552,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [124]:
# get all columns except `charges` by axis 1
X = insurance_data.drop("charges", axis=1)
X.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,18,33.77,1,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,28,33.0,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,33,22.705,0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
4,32,28.88,0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [125]:
# check shape of X
X.shape

(1338, 11)

In [126]:
# get only `charges` column
y = insurance_data["charges"]
y.head()

0    16884.92400
1     1725.55230
2     4449.46200
3    21984.47061
4     3866.85520
Name: charges, dtype: float64

In [127]:
# check shape of Y
y.shape

(1338,)

#### Split data

In [128]:
# split data, 80% training set, 20% testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

len(X_train), len(X_test), len(y_train), len(y_test)

(1070, 268, 1070, 268)

In [129]:
# convert to Torch.Tensor
X_train, X_test, y_train, y_test = torch.Tensor(np.array(X_train)), torch.Tensor(np.array(X_test)), torch.Tensor(np.array(y_train)), torch.Tensor(np.array(y_test))

In [130]:
# check shape
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([1070, 11]),
 torch.Size([268, 11]),
 torch.Size([1070]),
 torch.Size([268]))

#### Modeling

> https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html

In [131]:
class InsureanceRegressionNeuralNetworkV1(nn.Module):
    def __init__(self):
        super().__init__()

        self.weights = nn.Parameter(torch.randn(size=(1, 11)), requires_grad=True)

        self.bias = nn.Parameter(torch.randn(1), requires_grad=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return  torch.matmul(x, self.weights.T)  + self.bias # y = x * weights.T + bias

In [132]:
model_01 = InsureanceRegressionNeuralNetworkV1()
print(f"weight's shape: {model_01.state_dict()['weights'].shape}")
print(f"bias's shape: {model_01.state_dict()['bias'].shape}")

weight's shape: torch.Size([1, 11])
bias's shape: torch.Size([1])


In [133]:
class InsureanceRegressionNeuralNetworkV2(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Linear(in_features=11, out_features=1)
    
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return self.linear(X)

In [134]:
model_02 = InsureanceRegressionNeuralNetworkV2()
print(f"weight's shape: {model_02.state_dict()['linear.weight'].shape}")
print(f"bias's shape: {model_02.state_dict()['linear.bias'].shape}")


weight's shape: torch.Size([1, 11])
bias's shape: torch.Size([1])


**Model 01**

In [135]:
# instantiate a model
insurance_model_v1 = InsureanceRegressionNeuralNetworkV1()

# tensorboard
writer =SummaryWriter()

# copy to device
insurance_model_v1.to(device)

# copy data to device
train_data = X_train.to(device)
train_labels = y_train.to(device)
test_data = X_test.to(device)
test_labels = y_test.to(device)

# loss function
loss_fn = nn.L1Loss()

# optimizer 
opitmizer = torch.optim.SGD(insurance_model_v1.parameters(), lr=0.01)

In [136]:
# set manual seed
torch.cuda.manual_seed(42)

# num of epochs
epochs = 500


# start training
for epoch in range(epochs):

    # train mode
    insurance_model_v1.train()

    # forward computation
    y_train_preds = insurance_model_v1(train_data)

    # calculate loss
    loss = loss_fn(y_train_preds, train_labels)

    # add loss to tensorboar
    writer.add_scalar("Loss/train", loss, epoch)

    # reset gradient descent to zero
    opitmizer.zero_grad()

    # backpropagation
    loss.backward()

    # update parameters 
    opitmizer.step()

    # evaluation mode
    insurance_model_v1.eval()
    with torch.inference_mode():

        # forward computation
        y_test_preds = insurance_model_v1(test_data)

        # calculate loss
        loss_test = loss_fn(y_test_preds, test_labels)


        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | train loss: {loss} | test loss: {loss_test}")

    
    # flush
    writer.flush()
    writer.close()

Epoch: 0 | train loss: 13272.708984375 | test loss: 12870.671875
Epoch: 10 | train loss: 13024.052734375 | test loss: 12623.3984375
Epoch: 20 | train loss: 12775.3974609375 | test loss: 12376.1240234375
Epoch: 30 | train loss: 12526.791015625 | test loss: 12128.95703125
Epoch: 40 | train loss: 12281.4462890625 | test loss: 11885.23046875
Epoch: 50 | train loss: 12044.142578125 | test loss: 11650.169921875
Epoch: 60 | train loss: 11821.7255859375 | test loss: 11431.7431640625
Epoch: 70 | train loss: 11616.7607421875 | test loss: 11231.75
Epoch: 80 | train loss: 11428.65625 | test loss: 11048.296875
Epoch: 90 | train loss: 11254.93359375 | test loss: 10879.9326171875
Epoch: 100 | train loss: 11093.7724609375 | test loss: 10725.41015625
Epoch: 110 | train loss: 10943.7353515625 | test loss: 10582.998046875


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 120 | train loss: 10803.546875 | test loss: 10451.3896484375
Epoch: 130 | train loss: 10672.1650390625 | test loss: 10329.4404296875
Epoch: 140 | train loss: 10548.9296875 | test loss: 10215.9306640625
Epoch: 150 | train loss: 10433.3701171875 | test loss: 10110.533203125
Epoch: 160 | train loss: 10325.001953125 | test loss: 10012.923828125
Epoch: 170 | train loss: 10223.3486328125 | test loss: 9922.6279296875
Epoch: 180 | train loss: 10127.958984375 | test loss: 9839.1396484375
Epoch: 190 | train loss: 10038.2998046875 | test loss: 9761.8798828125
Epoch: 200 | train loss: 9953.9765625 | test loss: 9690.318359375
Epoch: 210 | train loss: 9874.5146484375 | test loss: 9623.9921875
Epoch: 220 | train loss: 9799.609375 | test loss: 9562.4130859375
Epoch: 230 | train loss: 9728.9892578125 | test loss: 9505.3154296875
Epoch: 240 | train loss: 9662.365234375 | test loss: 9452.2646484375
Epoch: 250 | train loss: 9599.4677734375 | test loss: 9402.888671875
Epoch: 260 | train loss: 9540.1

**Model 02**

In [137]:
# instantiate a model
insurance_model_v2 = InsureanceRegressionNeuralNetworkV2()

# tensorboard
writer =SummaryWriter()

# copy to device
insurance_model_v2.to(device)

# copy data to device
train_data = X_train.to(device)
train_labels = y_train.to(device)
test_data = X_test.to(device)
test_labels = y_test.to(device)

# loss function
loss_fn = nn.L1Loss()

# optimizer 
opitmizer = torch.optim.SGD(insurance_model_v2.parameters(), lr=0.01)

In [138]:
# set manual seed
torch.cuda.manual_seed(42)

# num of epochs
epochs = 500


# start training
for epoch in range(epochs):

    # train mode
    insurance_model_v2.train()

    # forward computation
    y_train_preds = insurance_model_v2(train_data)

    # calculate loss
    loss = loss_fn(y_train_preds, train_labels)

    # add loss to tensorboar
    writer.add_scalar("Loss/train", loss, epoch)

    # reset gradient descent to zero
    opitmizer.zero_grad()

    # backpropagation
    loss.backward()

    # update parameters 
    opitmizer.step()

    # evaluation mode
    insurance_model_v2.eval()
    with torch.inference_mode():

        # forward computation
        y_test_preds = insurance_model_v2(test_data)

        # calculate loss
        loss_test = loss_fn(y_test_preds, test_labels)


        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | train loss: {loss} | test loss: {loss_test}")

    
    # flush
    writer.flush()
    writer.close()

Epoch: 0 | train loss: 13351.0458984375 | test loss: 12948.7119140625
Epoch: 10 | train loss: 13102.390625 | test loss: 12701.4375
Epoch: 20 | train loss: 12853.734375 | test loss: 12454.1640625
Epoch: 30 | train loss: 12605.078125 | test loss: 12206.8916015625
Epoch: 40 | train loss: 12358.0576171875 | test loss: 11961.5087890625
Epoch: 50 | train loss: 12117.5810546875 | test loss: 11722.900390625
Epoch: 60 | train loss: 11890.03515625 | test loss: 11498.7392578125
Epoch: 70 | train loss: 11679.478515625 | test loss: 11293.0361328125
Epoch: 80 | train loss: 11486.3359375 | test loss: 11104.6630859375
Epoch: 90 | train loss: 11308.328125 | test loss: 10931.58203125
Epoch: 100 | train loss: 11143.33984375 | test loss: 10772.91796875
Epoch: 110 | train loss: 10989.947265625 | test loss: 10626.787109375
Epoch: 120 | train loss: 10846.8017578125 | test loss: 10491.9345703125
Epoch: 130 | train loss: 10712.7099609375 | test loss: 10367.08203125
Epoch: 140 | train loss: 10586.9736328125 | t

#### Evaluation

In [139]:
X_test[0], y_test[0]

(tensor([45.0000, 25.1750,  2.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000,
          0.0000,  0.0000,  0.0000]),
 tensor(9095.0684))

In [140]:
# evaluation
# set evaluation mode
insurance_model_v1.eval()
with torch.inference_mode():
    y_test_preds = insurance_model_v1(test_data[:1])


torch.squeeze(y_test_preds)

tensor(7056.5371, device='cuda:0')

In [141]:
# evaluation
# set evaluation mode
insurance_model_v2.eval()
with torch.inference_mode():
    y_test_preds = insurance_model_v2(test_data[0])

y_test_preds

tensor([7037.7954], device='cuda:0')

In [142]:
# check trained weights and bias
insurance_model_v1.state_dict()

OrderedDict([('weights',
              tensor([[107.3608,  87.8426,   3.3782,   2.2694,   1.8225,   2.8805,   1.2901,
                        -1.0434,  -1.5966,   0.7511,  -0.8610]], device='cuda:0')),
             ('bias', tensor([3.0005], device='cuda:0'))])

In [143]:
# check trained weights and bias
insurance_model_v2.state_dict()

OrderedDict([('linear.weight',
              tensor([[107.1352,  87.5208,   3.1182,   1.2426,   1.6705,   2.3868,   0.6630,
                         0.6825,   0.5713,   0.8077,   0.6139]], device='cuda:0')),
             ('linear.bias', tensor([2.8286], device='cuda:0'))])

In [144]:
# release models and GPU
del insurance_model_v1
del insurance_model_v2
torch.cuda.empty_cache()