In [1]:
# import libraries
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from datetime import datetime
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from pathlib import Path

# ignore warning
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

# check PyTorch version
print(f"PyTorch version: {torch.__version__}")

# check device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Created date
print(f"Created date: 2023-06-24 21:25:51.478665")

# Modified date
print(f"Modifed date: {datetime.now()}")


PyTorch version: 1.12.1+cu102
Created date: 2023-06-24 21:25:51.478665
Modifed date: 2023-06-25 02:23:16.350854


#### 1. Dataset

##### Loading dataset

In [2]:
# load boston dataset
boston_dataset = load_boston()

# features
X = boston_dataset["data"]

# labels
y = boston_dataset["target"]

In [3]:
# check shape of features
X.shape

(506, 13)

In [4]:
# check shape of labels
y.shape

(506,)

##### Splitting data

In [5]:
# split train_set, test_set
raw_X_train, raw_X_test, raw_y_train, raw_y_test = train_test_split(X, y, test_size=0.2, random_state=42)

raw_X_train = raw_X_train.astype(np.float32)
raw_X_test = raw_X_test.astype(np.float32)
raw_y_train = raw_y_train.astype(np.float32)
raw_y_test = raw_y_test.astype(np.float32)

In [6]:
# check train_set
raw_X_train.shape, raw_y_train.shape

((404, 13), (404,))

In [7]:
# check test_set
raw_X_test.shape, raw_y_test.shape

((102, 13), (102,))

In [8]:
# mean and standard deviation 
X_mean = np.mean(raw_X_train, axis=0)
X_mean, X_mean.shape

(array([3.6091244e+00, 1.1569307e+01, 1.0985050e+01, 7.1782179e-02,
        5.5648381e-01, 6.3158932e+00, 6.8556465e+01, 3.8081961e+00,
        9.3564358e+00, 4.0403217e+02, 1.8318344e+01, 3.5627826e+02,
        1.2457352e+01], dtype=float32),
 (13,))

In [9]:
# mean and standard deviation 
X_stddev = np.std(raw_X_train, axis=0)
X_stddev, X_stddev.shape

(array([8.8640671e+00, 2.3123781e+01, 6.8860784e+00, 2.5812620e-01,
        1.1755869e-01, 7.0857310e-01, 2.7960257e+01, 2.1285870e+00,
        8.5790796e+00, 1.6596674e+02, 2.2259424e+00, 9.1453209e+01,
        7.1015739e+00], dtype=float32),
 (13,))

In [10]:
# normalized train_set, test_set
normalized_X_train = (raw_X_train - X_mean) / X_stddev
normalized_X_test = (raw_X_test - X_mean) / X_stddev

# convert to float32
X_train = normalized_X_train.astype(np.float32)
X_test = normalized_X_test.astype(np.float32)

y_train = raw_y_train.reshape((-1, 1))
y_test = raw_y_test.reshape((-1, 1))

In [11]:
X_train.shape, X_test.shape

((404, 13), (102, 13))

In [12]:
y_train.shape, y_test.shape

((404, 1), (102, 1))

In [13]:
# convert to TensorDataset
train_tensor_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
test_tensor_dataset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

In [14]:
# batch size
batch_size = 16

# train dataloader
train_dataloader = DataLoader(dataset=train_tensor_dataset, shuffle= True, batch_size= batch_size)
test_dataloader = DataLoader(dataset=test_tensor_dataset, shuffle= False, batch_size= batch_size)

#### 2. Modeling

In [15]:
class BostonHousingRegressionNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.linears = nn.Sequential(
            nn.Linear(out_features= 64, in_features=13),
            nn.ReLU(),
            nn.Linear(out_features= 64, in_features= 64),
            nn.ReLU(),
            nn.Linear(in_features= 64, out_features= 1)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linears(x)

In [16]:
# set seed
torch.manual_seed(42)

# epochs 
num_epochs = 500

# model
boston_housing_model = BostonHousingRegressionNeuralNetwork()

# loss function
loss_fn = nn.MSELoss()

# optimizer
optimizer = torch.optim.Adam(params=boston_housing_model.parameters())

# init weights
for module in boston_housing_model.modules():
    if isinstance(module, nn.Linear):
        nn.init.xavier_uniform_(module.weight)
        nn.init.constant_(module.bias, 0.0)

# copy to device
boston_housing_model.to(device)

BostonHousingRegressionNeuralNetwork(
  (linears): Sequential(
    (0): Linear(in_features=13, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [17]:
# start training
for epoch in range(num_epochs):

    accumulated_train_loss = 0.0
    accumulated_train_batches = 0

    # training
    for input, target in train_dataloader:

        # copy data to device
        input = input.to(device)
        target = target.to(device)

        # train mode
        boston_housing_model.train()

        # forward pass
        output = boston_housing_model(input)

        # calculate loss
        loss = loss_fn(output, target)

        # accumulate losses 
        accumulated_train_loss += loss.item()

        # accumulate batches
        accumulated_train_batches += 1

        # set gradients of all optimized parameters to zero
        optimizer.zero_grad()

        # backward propagation
        loss.backward()

        # update gradients
        optimizer.step()


    train_loss = accumulated_train_loss / accumulated_train_batches

    # eval mode
    boston_housing_model.eval()

    with torch.inference_mode():

        # testing
        accumulated_test_loss = 0.0
        accumulated_test_batches = 0
        
        for input, target in test_dataloader:

            # copy data to device
            input = input.to(device)
            target = target.to(device)

            # forward pass
            output = boston_housing_model(input)

            # calculate loss
            test_loss = loss_fn(output, target)

            # accumulate losses 
            accumulated_test_loss += test_loss.item()

            # accumulate batches
            accumulated_test_batches += 1

        
        test_loss = accumulated_test_loss/ accumulated_test_batches

        print(f"Epoch: {epoch}/500 | "
            f" train_loss: {train_loss: 0.3f} | "
            f" test_loss: { test_loss: 0.3f}")

    
     

Epoch: 0/500 |  train_loss:  571.233 |  test_loss:  473.980
Epoch: 1/500 |  train_loss:  466.017 |  test_loss:  347.964
Epoch: 2/500 |  train_loss:  294.703 |  test_loss:  165.105
Epoch: 3/500 |  train_loss:  117.762 |  test_loss:  65.340
Epoch: 4/500 |  train_loss:  57.193 |  test_loss:  48.453
Epoch: 5/500 |  train_loss:  35.789 |  test_loss:  40.377
Epoch: 6/500 |  train_loss:  28.728 |  test_loss:  36.868
Epoch: 7/500 |  train_loss:  23.234 |  test_loss:  33.954
Epoch: 8/500 |  train_loss:  21.976 |  test_loss:  32.379
Epoch: 9/500 |  train_loss:  20.065 |  test_loss:  31.065
Epoch: 10/500 |  train_loss:  18.950 |  test_loss:  29.783
Epoch: 11/500 |  train_loss:  17.413 |  test_loss:  28.483
Epoch: 12/500 |  train_loss:  17.070 |  test_loss:  27.501
Epoch: 13/500 |  train_loss:  16.063 |  test_loss:  27.178
Epoch: 14/500 |  train_loss:  15.811 |  test_loss:  25.900
Epoch: 15/500 |  train_loss:  15.721 |  test_loss:  25.566
Epoch: 16/500 |  train_loss:  15.460 |  test_loss:  25.457


#### 3. Evaluation

In [18]:
# evaluation
# eval mode
boston_housing_model.eval()
with torch.inference_mode():
    predictions = boston_housing_model(torch.from_numpy(X_test).to(device))

predictions = predictions.cpu()
predictions.shape


torch.Size([102, 1])

In [19]:
# print out 
for i in range(0, 4):
    print(f" Predicted value: {predictions[i].squeeze(): 0.2f} | True value: {y_test[i].squeeze(): 0.2f}")

 Predicted value:  28.44 | True value:  23.60
 Predicted value:  34.19 | True value:  32.40
 Predicted value:  12.10 | True value:  13.60
 Predicted value:  21.37 | True value:  22.80
