<a href="https://colab.research.google.com/github/mertsaru/Pytorch-Linear-regression-example/blob/main/insuarence_prediction_with_pythorch_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split

In [2]:
DATASET_URL = "https://gist.github.com/BirajCoder/5f068dfe759c1ea6bdfce9535acdb72d/raw/c84d84e3c80f93be67f6c069cbdc0195ec36acbd/insurance.csv"
DATA_FILENAME = "insurance.csv"
download_url(DATASET_URL, '.')

Using downloaded and verified file: ./insurance.csv


In [3]:
pdf = pd.read_csv(DATA_FILENAME)
print(pdf.head())
pdf.shape

   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


(1338, 7)

In [4]:
# Dividing input and target
X = pdf.drop('charges',axis=1)
Y = pdf[['charges']]
input_size, output_size = X.shape[1], Y.shape[1]
X.shape, Y.shape

((1338, 6), (1338, 1))

In [5]:
# Data-specific parameter
categorical_cols = ['sex','smoker','region']

# Model parameter
batch_size = 5

In [6]:
# Transforming categorical non-integers into integers
for col in categorical_cols:
  X[col] = X[col].astype('category').cat.codes

In [7]:
# Transforming data into ndarray
X = X.values
Y = Y.values

In [8]:
tdf = TensorDataset(torch.tensor(X,dtype = torch.float32),torch.tensor(Y,dtype = torch.float32))
train_set, val_set = random_split(tdf,[1000,338])
val_set, test_set = random_split(val_set, [238,100])


train_set = DataLoader(train_set, batch_size, shuffle = True )
val_set = DataLoader(val_set,batch_size)

In [9]:
# Writing Model
class InsuranceLR(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(input_size,output_size)

  def forward(self,x):
    out = self.linear(x)
    return out

  def training_step(self,batch):
    x, y = batch
    ypred = self(x)
    loss = F.mse_loss(y, ypred)
    return loss

  def validation_step(self,batch):
    x, y = batch
    ypred = self(x)
    loss = F.mse_loss(y,ypred)
    return {'val_loss': loss.detach()}

  def validation_epoch(self,result):
    losses = [loss['val_loss'] for loss in result]
    epoch_loss = torch.stack(losses).mean()
    return {'val_loss': epoch_loss.item()}
  
  def epoch_log(self,num_epochs,epoch,val_result):
    if ((epoch+1) % 10 == 0) or (epoch+1 == num_epochs):
      print('Epoch : [{}]  loss: {:.4f}'.format(epoch+1 ,val_result['val_loss']))

In [10]:
# Evaluation and fit function
def evaluate(model,val_set):
  batch_losses = [model.validation_step(batch) for batch in val_set]
  epoch_loss = model.validation_epoch(batch_losses)
  return epoch_loss

def fit(model,train_set, val_set, epochs, lr, optimizer_func = torch.optim.SGD):
  history = [] # storing the loss
  optimizer = optimizer_func(model.parameters(),lr)
  for epoch in range(epochs):
    # Training
    for batch in train_set:
      loss = model.training_step(batch)
      loss.backward() # learning by backward prop
      optimizer.step()
      optimizer.zero_grad() # cleaning optimizer
    
    # Validation
    epoch_loss = evaluate(model,val_set)
    model.epoch_log(epochs, epoch, epoch_loss) # printing the log
    history.append(epoch_loss)
  return history

In [11]:
model = InsuranceLR()
lr = 1e-4
epochs = 200

In [12]:
history = fit(model,train_set,val_set,epochs,lr)

Epoch : [10]  loss: 148336240.0000
Epoch : [20]  loss: 125539704.0000
Epoch : [30]  loss: 116204544.0000
Epoch : [40]  loss: 108080448.0000
Epoch : [50]  loss: 97621152.0000
Epoch : [60]  loss: 90344472.0000
Epoch : [70]  loss: 91875800.0000
Epoch : [80]  loss: 84019256.0000
Epoch : [90]  loss: 76716872.0000
Epoch : [100]  loss: 73314296.0000
Epoch : [110]  loss: 67657496.0000
Epoch : [120]  loss: 74749896.0000
Epoch : [130]  loss: 63328060.0000
Epoch : [140]  loss: 59375924.0000
Epoch : [150]  loss: 59674516.0000
Epoch : [160]  loss: 56127740.0000
Epoch : [170]  loss: 54577188.0000
Epoch : [180]  loss: 52621916.0000
Epoch : [190]  loss: 57383696.0000
Epoch : [200]  loss: 49530432.0000


In [13]:
# Prediction function
def prediction(data, model):
  x, target = data
  x = x.unsqueeze(0)
  prediction = model(x)
  prediction = prediction[0].detach()
  
  print('Input :', x)
  print('Prediction :', prediction) 
  print('Target :', target)

In [14]:
test_data = test_set[0]
prediction(test_data, model)

Input : tensor([[25.0000,  1.0000, 23.9000,  5.0000,  0.0000,  3.0000]])
Prediction : tensor([6361.6226])
Target : tensor([5080.0962])
