**Problem** 

Use a neural network to solve a regression problem, using the Boston housing dataset

In [18]:
# import libraties
from torchsummary import summary
import torch
import numpy as np
import pandas as pd
from torch import nn
from datetime import datetime
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# ignore FutureWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# check torch version
print(f"PyTorch version: {torch.__version__}")

# check GPU or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# created date
print(f"Created date: 2023-06-23 00:30:42.960612")

# modified date
print(f"Modified date: {datetime.now()}")

PyTorch version: 1.12.1+cu102
Device: cuda
Created date: 2023-06-23 00:30:42.960612
Modified date: 2023-06-25 01:53:13.795503


#### Dataset

> https://pytorch.org/docs/stable/data.html

In [19]:
# Read and standardize the data.
boston_housing = load_boston()
data = boston_housing.get('data')
target = boston_housing.get('target')

raw_x_train, raw_x_test, y_train, y_test = train_test_split(
    data, target, test_size=0.2, random_state=42)

# Convert to same precision as model.
raw_x_train = raw_x_train.astype(np.float32)
raw_x_test = raw_x_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

In [20]:
raw_x_train.shape

(404, 13)

In [21]:
raw_x_test.shape

(102, 13)

In [22]:
y_train.shape

(404,)

In [23]:
y_train = np.reshape(y_train, (-1, 1))
y_train.shape

(404, 1)

In [24]:
y_test.shape

(102,)

In [25]:
y_test = np.reshape(y_test, (-1, 1))
y_test.shape

(102, 1)

In [26]:
raw_x_train.shape

(404, 13)

In [27]:
raw_x_test.shape

(102, 13)

In [28]:
x_mean = np.mean(raw_x_train, axis=0)
x_stddev = np.std(raw_x_train, axis=0)
x_train = (raw_x_train - x_mean) / x_stddev
x_test = (raw_x_test - x_mean) / x_stddev

In [29]:
# load training data, testing data into data loader
batch_size = 16

# create Dataset objects
train_dataset = TensorDataset(torch.from_numpy(x_train),
                         torch.from_numpy(y_train))
test_dataset = TensorDataset(torch.from_numpy(x_test),
                        torch.from_numpy(y_test))

# create DataLoader objects
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle= True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle= False)

#### Modeling

In [30]:
class BostonRegressionNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(in_features=13, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=1)
        )

    def forward(self, x: torch.Tensor)-> torch.Tensor:
        return self.layers(x)

In [31]:
# set seed
torch.manual_seed(42)

# model
boston_housing_model = BostonRegressionNeuralNetwork()

# init weights
for module in boston_housing_model.modules():
    if isinstance(module, nn.Linear):
        nn.init.xavier_uniform_(module.weight)
        nn.init.constant_(module.bias, 0.0)

# copy to device
boston_housing_model.to(device)


BostonRegressionNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=13, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [32]:
# num epochs
epochs = 500

# loss function
loss_fn = nn.MSELoss()

# optimizer
optimizer = torch.optim.Adam(params=boston_housing_model.parameters())

# start training
for epoch in range(epochs):

    # accummulate losses of all batches from training
    accummulated_trained_loss = 0.0
    accummulated_trained_batches = 0
    accummulated_trained_absoluate_error = 0.0

    for train_inputs, train_labels in train_loader:

        train_inputs = train_inputs.to(device)
        train_labels = train_labels.to(device)

        # train mode
        boston_housing_model.train()

        # forward pass 
        y_preds_train = boston_housing_model(train_inputs)

        # calculate loss between y_preds_train with labels
        loss = loss_fn(train_labels, y_preds_train)

        # reset learned parameters
        optimizer.zero_grad()

        # backward propagration
        loss.backward()

        # update paramenters
        optimizer.step()  

        # accummulate loss
        accummulated_trained_loss += loss.item()

        # accummulate batches
        accummulated_trained_batches += 1

        # train absolute error
        accummulated_trained_absoluate_error += (train_labels - y_preds_train.data).abs().sum().item()

    train_loss =  accummulated_trained_loss / accummulated_trained_batches
    train_mae = accummulated_trained_absoluate_error / (accummulated_trained_batches * batch_size)

    # eval mode
    boston_housing_model.eval()

    # inference only
    with torch.inference_mode():
                
        # accummulate losses of all batches from training
        accummulated_tested_loss = 0.0
        accummulated_tested_batches = 0
        accummulated_tested_absoluate_error = 0.0

        for test_inputs, test_labels in test_loader:    

            test_inputs = test_inputs.to(device)
            test_labels = test_labels.to(device)
            
            # forward pass
            y_preds_test = boston_housing_model(test_inputs)

            # calculate loss between y_preds_test with test_labels
            loss_test = loss_fn(test_labels, y_preds_test)

            # accummulate losses of batches from testing
            accummulated_tested_loss += loss_test.item()
            accummulated_tested_batches += 1
            accummulated_tested_absoluate_error += (test_labels - y_preds_test.data).abs().sum().item()
        
        test_loss = accummulated_tested_loss / accummulated_tested_batches
        test_mae = accummulated_tested_absoluate_error / (accummulated_tested_batches * batch_size)

        print(f'Epoch {epoch+1}/{epochs} loss: {train_loss:.4f} - mae: {train_mae:0.4f} - val_loss: {test_loss:.4f} - val_mae: {test_mae:0.4f}')



Epoch 1/500 loss: 571.2335 - mae: 21.3626 - val_loss: 473.9796 - val_mae: 17.8465
Epoch 2/500 loss: 466.0174 - mae: 18.8653 - val_loss: 347.9645 - val_mae: 14.7854
Epoch 3/500 loss: 294.7034 - mae: 14.2650 - val_loss: 165.1048 - val_mae: 9.4841
Epoch 4/500 loss: 117.7620 - mae: 8.4803 - val_loss: 65.3399 - val_mae: 4.9051
Epoch 5/500 loss: 57.1925 - mae: 5.7693 - val_loss: 48.4534 - val_mae: 3.8058
Epoch 6/500 loss: 35.7888 - mae: 4.4097 - val_loss: 40.3774 - val_mae: 3.2068
Epoch 7/500 loss: 28.7282 - mae: 3.7386 - val_loss: 36.8683 - val_mae: 2.9374
Epoch 8/500 loss: 23.2344 - mae: 3.4219 - val_loss: 33.9543 - val_mae: 2.7850
Epoch 9/500 loss: 21.9759 - mae: 3.2746 - val_loss: 32.3792 - val_mae: 2.6551
Epoch 10/500 loss: 20.0651 - mae: 3.1229 - val_loss: 31.0648 - val_mae: 2.6031
Epoch 11/500 loss: 18.9504 - mae: 3.0288 - val_loss: 29.7827 - val_mae: 2.5005
Epoch 12/500 loss: 17.4127 - mae: 2.9402 - val_loss: 28.4827 - val_mae: 2.4407
Epoch 13/500 loss: 17.0702 - mae: 2.8914 - val_lo

In [33]:
# evaluate 
boston_housing_model.eval() # eval mode
with torch.inference_mode(): # inference only
    y_preds_test = boston_housing_model(torch.from_numpy(x_test).to(device))


In [34]:
# check the diffence between predictions and labels
for i in range(0,4):
    print(f"predicted y: {y_preds_test[i].squeeze(): 0.2f} | label: {y_test[i].squeeze(): 0.2f}")

predicted y:  28.44 | label:  23.60
predicted y:  34.19 | label:  32.40
predicted y:  12.10 | label:  13.60
predicted y:  21.37 | label:  22.80
