In [1]:
import os

import numpy as np
import pandas as pd

train_data = pd.read_csv("./Train_Data.csv")
test_data = pd.read_csv("./Test_Data.csv")

print(train_data, test_data)


            age     sex        bmi smoker     region  children       charges
0     21.000000    male  25.745000     no  northeast         2   3279.868550
1     36.976978  female  25.744165    yes  southeast         3  21454.494239
2     18.000000    male  30.030000     no  southeast         1   1720.353700
3     37.000000    male  30.676891     no  northeast         3   6801.437542
4     58.000000    male  32.010000     no  southeast         1  11946.625900
...         ...     ...        ...    ...        ...       ...           ...
3625  48.820767  female  41.426984     no  northwest         4  10987.324964
3626  38.661977  female  26.202557     no  southeast         2  11735.844352
3627  56.000000    male  40.300000     no  southwest         0  10602.385000
3628  48.061207  female  34.930624     no  southeast         1   8976.140452
3629  37.598865  female  25.219233     no  northeast         3   7027.698968

[3630 rows x 7 columns]            age     sex        bmi smoker     region

In [2]:
for dataset in [train_data, test_data]:
    dataset["is_male"] = [1 if i == "male" else 0 for i in dataset["sex"]]
    dataset["is_female"] = [1 if i == "female" else 0 for i in dataset["sex"]]
    dataset["smoker"] = [1 if i == "yes" else 0 for i in dataset["smoker"]]

    del dataset["region"]
    del dataset["sex"]

test_data

Unnamed: 0,age,bmi,smoker,children,is_male,is_female
0,40.000000,29.900000,0,2,1,0
1,47.000000,32.300000,0,1,1,0
2,54.000000,28.880000,0,2,0,1
3,37.000000,30.568094,0,3,1,0
4,59.130049,33.132854,1,4,1,0
...,...,...,...,...,...,...
487,51.000000,27.740000,0,1,1,0
488,33.000000,42.400000,0,5,1,0
489,47.769999,29.064615,0,4,1,0
490,41.530738,24.260852,0,5,0,1


In [3]:
train_data

Unnamed: 0,age,bmi,smoker,children,charges,is_male,is_female
0,21.000000,25.745000,0,2,3279.868550,1,0
1,36.976978,25.744165,1,3,21454.494239,0,1
2,18.000000,30.030000,0,1,1720.353700,1,0
3,37.000000,30.676891,0,3,6801.437542,1,0
4,58.000000,32.010000,0,1,11946.625900,1,0
...,...,...,...,...,...,...,...
3625,48.820767,41.426984,0,4,10987.324964,0,1
3626,38.661977,26.202557,0,2,11735.844352,0,1
3627,56.000000,40.300000,0,0,10602.385000,1,0
3628,48.061207,34.930624,0,1,8976.140452,0,1


In [4]:
import torch
from torch.utils.data import Dataset

class InsuranceDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform=None):
        """
        Args:
            df (DataFrame): Prepared dataframe.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = df
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Select features (X) and target (y)
        X = self.data.loc[idx, ['age', 'bmi', 'smoker', 'children', 'is_male', 'is_female']].values.astype(float)
        try:
            y = self.data.loc[idx, 'charges'].astype(float)
        except:
            y = 0
        # Convert to torch tensors
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
        
        sample = {'X': X, 'y': np.array([y])}

        if self.transform:
            sample = self.transform(sample)

        return sample


In [5]:
train_dataset = InsuranceDataset(train_data)
test_dataset = InsuranceDataset(test_data)

train_dataset[0]

{'X': tensor([21.0000, 25.7450,  0.0000,  2.0000,  1.0000,  0.0000]),
 'y': array([3279.8687], dtype=float32)}

In [6]:
from torch.utils.data import DataLoader
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

for i in train_dataloader:
    X, y = i.values()
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 6])
Shape of y: torch.Size([64, 1]) torch.float32


In [7]:
from torch import nn

#Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(6, 36),
            nn.ReLU(),
            nn.Linear(36, 12),
            nn.ReLU(),
            nn.Linear(12, 1)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=6, out_features=36, bias=True)
    (1): ReLU()
    (2): Linear(in_features=36, out_features=12, bias=True)
    (3): ReLU()
    (4): Linear(in_features=12, out_features=1, bias=True)
  )
)


In [8]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
    size = len(train_data)
    model.train()
    for batch, i in enumerate(dataloader):
        X, y = i.values()
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(train_data)
    for batch, i in enumerate(dataloader):
        X, _ = i.values()
        X = X.to(device)
        # Compute prediction error
        pred = model(X)
        print(X, pred)
        

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
print("Done!")
test(test_dataloader, model, loss_fn)

Epoch 1
-------------------------------
loss: 13947.932617  [   64/ 3630]
Epoch 2
-------------------------------
loss: 7682.756348  [   64/ 3630]
Epoch 3
-------------------------------
loss: 7743.453125  [   64/ 3630]
Epoch 4
-------------------------------
loss: 7871.386719  [   64/ 3630]
Epoch 5
-------------------------------
loss: 7893.858398  [   64/ 3630]
Done!
tensor([[40.0000, 29.9000,  0.0000,  2.0000,  1.0000,  0.0000],
        [47.0000, 32.3000,  0.0000,  1.0000,  1.0000,  0.0000],
        [54.0000, 28.8800,  0.0000,  2.0000,  0.0000,  1.0000],
        [37.0000, 30.5681,  0.0000,  3.0000,  1.0000,  0.0000],
        [59.1300, 33.1329,  1.0000,  4.0000,  1.0000,  0.0000],
        [43.0000, 34.4000,  0.0000,  3.0000,  0.0000,  1.0000],
        [49.7731, 25.8525,  0.0000,  4.0000,  0.0000,  1.0000],
        [31.2070, 29.9389,  0.0000,  5.0000,  1.0000,  0.0000],
        [19.0000, 31.2998,  0.0000,  1.0000,  0.0000,  1.0000],
        [59.4579, 29.6322,  0.0000,  3.0000,  0.0000

In [9]:
x = torch.tensor([[21.0000, 25.7450,  0.0000,  2.0000,  1.0000,  0.0000]])
print(x)
x = x.to(device)
pred = model(x)
pred

tensor([[21.0000, 25.7450,  0.0000,  2.0000,  1.0000,  0.0000]])


tensor([[5452.9209]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [10]:
torch.save(model, "./model.pth")

In [11]:
pred.item()

5452.9208984375