In [None]:
pip install jovian --upgrade -q

In [None]:
import jovian
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split

In [None]:
dataframe = pd.read_csv('/kaggle/input/insurance/insurance.csv')
dataframe.head(5)

In [None]:
num_rows = dataframe.shape[0]
num_rows

In [None]:
dataframe.info()

In [None]:
dataframe.isna().sum()

In [None]:
dataframe.drop(['region'], axis=1, inplace = True)

In [None]:
dataframe

In [None]:
input_cols = ['age','sex','bmi','children','smoker']

In [None]:
categorical_cols = ['sex','smoker']

In [None]:
output_cols = ['charges']

In [None]:
dataframe.describe()

In [None]:
import seaborn as sns
plt.title("Distribution of values in graph")
sns.distplot(dataframe.charges, kde = True)

# Prepare dataset for training
We need to convert the data from the Pandas dataframe into a **PyTorch tensors** for training. To do this, the first step is to convert it numpy arrays. If you've filled out **input_cols, categorial_cols and output_cols** correctly, this following function will perform the conversion to numpy arrays.

In [None]:
def dataframe_to_arrays(dataframe):
    # making copy of original dataframe
    dataframe1 = dataframe.copy(deep = True)
    # convert categorical  or non numeric cols to numbers
    for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
    # Extracting input and outputs as numpy arrays
    inputs_array = dataframe1[input_cols].to_numpy()
    targets_array = dataframe1[output_cols].to_numpy()
    return inputs_array, targets_array

In [None]:
inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs_array, targets_array

**Convert numpy arrays to PyTorch tensors (torch.float32)**

In [None]:
inputs = torch.from_numpy(inputs_array).float()
targets = torch.from_numpy(targets_array).float()

In [None]:
inputs.dtype, targets.dtype

**Next, we need to create PyTorch datasets & data loaders for training & validation. We'll start by creating a TensorDataset.**

In [None]:
dataset = TensorDataset(inputs, targets)

**Use random_split to create training & validation datasets.**

In [None]:
val_percent = 0.09
val_size = int(val_percent * num_rows)
train_size = num_rows - val_size

In [None]:
train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [None]:
batch_size = 50

In [None]:
train_loader = DataLoader(train_ds, batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size)

In [None]:
for xb, yb in train_loader:
    print("inputs: ", xb)
    print("targets: ", yb)
    break

# Creating a Linear Regression Model

In [None]:
input_size = len(input_cols)
output_size = len(output_cols)

In [None]:
class InsuranceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, xb):
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        loss = F.l1_loss(out, targets)
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        loss = F.l1_loss(out, targets)
        return {'val_loss': loss.detach()}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        return {'val_loss':epoch_loss.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        if(epoch+1) % 20 == 0 or epoch == num_epochs -1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
            

In [None]:
model = InsuranceModel()

In [None]:
list(model.parameters())

# Train the model to fit the data

In [None]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training phase
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        # validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

In [None]:
result = evaluate(model, val_loader)
print(result)

In [None]:
epochs = 50
lr = 5e-2
history1 = fit(epochs, lr, model, train_loader, val_loader)

In [None]:
epochs = 1000
lr = 3e-1
history1 = fit(epochs, lr, model, train_loader, val_loader)

In [None]:
epochs = 1000
lr = 2e-1
history1 = fit(epochs, lr, model, train_loader, val_loader)

In [None]:
epochs = 2000
lr = 1e-5
history1 = fit(epochs, lr, model, train_loader, val_loader)

In [None]:
epochs = 2000
lr = 0.1
history1 = fit(epochs, lr, model, train_loader, val_loader)

In [None]:
val_loss = 5186

In [None]:
jovian.log_metrics(val_loss=val_loss)

# Make predictions using trained model

In [None]:
def predict_single(input, target, model):
    inputs = input.unsqueeze(0)
    predictions = model(inputs)            # fill this
    prediction = predictions[0].detach()
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)

In [None]:
input, target = val_ds[0]
predict_single(input, target, model)

In [None]:
input, target = val_ds[10]
predict_single(input, target, model)

In [None]:
input, target = val_ds[23]
predict_single(input, target, model)