## Neural networks modeling

In [1]:
import time

import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from utils.transform_scale import transform_v2_scale_df, TARGET_VARIABLE_COLUMN

import torch
import torch.nn as nn
import torch.optim as optim

DATA_PATH = Path("data")

In [2]:
# Load augmented data
train_augmented = pd.read_csv(DATA_PATH / "train-augmented.csv", parse_dates=["month"])
test_augmented = pd.read_csv(DATA_PATH / "test-augmented.csv", parse_dates=["month"])

# train_augmented.head()

In [3]:
# See linear.ipynb for details - code copied from there
# Split the train data into train and test
X = train_augmented.drop(columns=TARGET_VARIABLE_COLUMN)
y = train_augmented[TARGET_VARIABLE_COLUMN]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Transform and scale the data
# See utils/transform_scale.py for details
X_train = transform_v2_scale_df(X_train)
X_test = transform_v2_scale_df(X_test)
# X_train.head()

In [4]:
# Check if the GPU is available
# torch.cuda.is_available()
# print(torch.backends.mps.is_available())
# print(torch.backends.mps.is_built())
device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
device

device(type='mps')

In [5]:
# Construct a basic fully connected feed forward neural network
class ANN(nn.Module):
    def __init__(self, input_size):
        super(ANN, self).__init__()
        self.input_size = input_size
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )

    def forward(self, x):
        return self.layers(x)


In [21]:
# Convert the data to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)

dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True)

# Define the model
model = ANN(X_train_tensor.shape[1]).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.02)

epochs = 50
bs = 200
lr = 0.05 # initial learning rate


In [23]:
# Train the model
for epoch in range(epochs):
    running_loss = 0.0
    if epoch%10==0 and epoch>10: 
        lr = lr / 1.5   
    optimizer = optim.Adam(model.parameters() , lr=lr)
    for X, y in train_loader:
        X = X.requires_grad_()
        y = y
        optimizer.zero_grad()
        output = model(X)
        output = output.squeeze()
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1} / {epochs}, Training loss: {epoch_loss:.4f}")
# 326849650.7733
# 569775956.7052


Epoch 1 / 50, Training loss: 3635502490.4059
Epoch 2 / 50, Training loss: 1016734081.3511
Epoch 3 / 50, Training loss: 1219757847.1585
Epoch 4 / 50, Training loss: 1075490318.0563
Epoch 5 / 50, Training loss: 658729980.9659
Epoch 6 / 50, Training loss: 762289569.7067
Epoch 7 / 50, Training loss: 569775956.7052
Epoch 8 / 50, Training loss: 535397565.0844
Epoch 9 / 50, Training loss: 582343987.3185
Epoch 10 / 50, Training loss: 503435924.6815
Epoch 11 / 50, Training loss: 477362991.4785
Epoch 12 / 50, Training loss: 461771549.4163
Epoch 13 / 50, Training loss: 463798678.1156
Epoch 14 / 50, Training loss: 439289370.0030
Epoch 15 / 50, Training loss: 448217072.7111
Epoch 16 / 50, Training loss: 429948129.8252
Epoch 17 / 50, Training loss: 436386614.0207
Epoch 18 / 50, Training loss: 440273813.1911
Epoch 19 / 50, Training loss: 416232291.2356
Epoch 20 / 50, Training loss: 422033406.8741
Epoch 21 / 50, Training loss: 375320705.0311
Epoch 22 / 50, Training loss: 367872505.6830
Epoch 23 / 50, 

In [28]:
# Evaluate the model
with torch.no_grad():
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_test_tensor, y_pred.squeeze())
    y_pred = y_pred.cpu().numpy()
    print(f"Test loss: {test_loss.item()}")
    print(f"Mean squared error: {mean_squared_error(y_test, y_pred)}")
    print(f"Mean absolute error: {mean_absolute_error(y_test, y_pred)}")
    print(f"R2 score: {r2_score(y_test, y_pred)}")

Test loss: 329983072.0
Mean squared error: 329983107.599597
Mean absolute error: 13131.349132678322
R2 score: 0.980300166730642
