In [2]:
import pandas as pd
import os

from model import Transformer # this is the transformer.py file
import torch
from torch import nn
import numpy as np

In [3]:
# Data preprocessing

from data_processing import process_file

data_path = "data/"

all_data = pd.DataFrame()

for file in os.listdir(data_path):
    if file.endswith(".txt"):
        file_path = os.path.join(data_path, file)
        loc_df = process_file(file_path)

    # print(file, len(loc_df))
    if len(all_data) == 0:
        all_data = loc_df
    else:
        all_data = pd.merge(all_data, loc_df, on = ["year", "month"])

all_data.to_csv("data/processed_data.csv")

In [4]:
# Segmentation

from data_processing import create_segments

input_years = 10  # Length of input period in years
target_years = 1   # Length of target period in years

input_segments, target_segments = create_segments(all_data, input_years, target_years, overlapping=False)

print("# of segments:", len(input_segments))
print("Length of each input vector (months):", len(input_segments[0]))
print("Length of each target vector (months):", len(target_segments[0]))

# Convert to tensors
feature_columns = all_data.columns.difference(['year', 'month'])
input_tensors = torch.tensor([df[feature_columns].values for df in input_segments], dtype=torch.float32)
target_tensors = torch.tensor([df[feature_columns].values for df in target_segments], dtype=torch.float32)

# Print tensor lengths
print("\nTensor Lengths:")
print(f"There are {len(input_tensors)} input tensors and {len(target_tensors)} target tensors")
print(f"Input tensors length: {len(input_tensors[0])}")
print(f"Target tensors length: {len(target_tensors[0])}")


# of segments: 12
Length of each input vector (months): 120
Length of each target vector (months): 12

Tensor Lengths:
There are 12 input tensors and 12 target tensors
Input tensors length: 120
Target tensors length: 12


  input_tensors = torch.tensor([df[feature_columns].values for df in input_segments], dtype=torch.float32)


In [5]:
from dataset import TimeSeriesDataset
from torch.utils.data import DataLoader, random_split
batch_size = 2

ts_dataset = TimeSeriesDataset(input_tensors, target_tensors)
train_size = int(0.6 * len(ts_dataset))  # e.g., 70% of data for training
val_size = int(0.2 * len(ts_dataset))  # 20% of data for validation 
test_size = len(ts_dataset) - train_size - val_size  # Remaining for testing

# Randomly split the dataset into training and validation datasets
train_dataset, val_dataset, test_dataset = random_split(ts_dataset, [train_size, val_size, test_size])

# Create DataLoaders for both training and validation sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)  # Usually, no need to shuffle the validation set
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print(f"Train Loader Length: {len(train_loader)}")
print(f"Validation Loader Length: {len(val_loader)}")
print(f"Test Loader Length: {len(test_loader)}")

input_batch, target_batch = next(iter(train_loader))

# Print the shapes
print(f'Input batch shape: {input_batch.shape}')  # e.g., torch.Size([8, 120, 15])
print(f'Target batch shape: {target_batch.shape}')  # e.g., torch.Size([8, 12, 15])

Train Loader Length: 4
Validation Loader Length: 1
Test Loader Length: 2
Input batch shape: torch.Size([2, 120, 15])
Target batch shape: torch.Size([2, 12, 15])


#### Model

In [30]:
# Hyperparameters

d_model = 15
ffn_hidden = 2048
num_heads = 5
drop_prob = 0.1
num_layers = 1

transformer = Transformer(d_model, ffn_hidden, num_heads, drop_prob, num_layers)
ar_transformer = Transformer(d_model, ffn_hidden, num_heads, drop_prob, num_layers)

criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(params=transformer.parameters(), lr = 1e-3)
ar_optimizer = torch.optim.AdamW(params = ar_transformer.parameters(), lr = 1e-3)

In [31]:
# Multistep Forward Pass

def MultiStepForwardPass(model, input_batch, target_batch):
    predictions = None  # Initialize predictions

    for i in range(len(target_batch[0])):
        if i == 0:
            single_step_input = input_batch
        else:
            single_step_input = torch.cat((input_batch[:, i:, :], predictions), dim=1)

        # Extract the target for the current step
        single_step_target = target_batch[:, i:i+1, :]

        # Forward pass
        single_step_logits = model(single_step_input, single_step_target)

        # Update predictions
        if i == 0:
            predictions = single_step_logits
        else:
            predictions = torch.cat((predictions, single_step_logits), dim=1)

    return predictions

In [32]:
# Custom Training Loop

def train_model(model, epochs, optimizer, criterion, save_freq = 20, auto_reg = False):
    print("Autoregressive Model Training") if auto_reg else print("Single-Shot Model Training")
    for epoch in range(1, epochs+1):
        
        model.train()
        train_loss = 0

        for input_batch, target_batch in train_loader:
            optimizer.zero_grad()

            if auto_reg:
                logits_batch = MultiStepForwardPass(model, input_batch, target_batch)
            else:
                logits_batch = model(input_batch, target_batch)
            
            loss = criterion(logits_batch, target_batch)
            loss.backward()

            optimizer.step()

            train_loss += loss.item()
            
        # Calculate average loss for the epoch
        train_loss /= len(train_loader)

        model.eval()

        val_loss = 0
        with torch.inference_mode():
            for input_batch, target_batch in val_loader:

                # Forward pass
                if auto_reg:
                    logits_batch = MultiStepForwardPass(model, input_batch, target_batch)
                else:
                    logits_batch = model(input_batch, target_batch)

                # Calculate loss
                loss = criterion(logits_batch, target_batch)

                val_loss += loss.item()
            
        val_loss /= len(val_loader)
        
        # Print epoch stats
        if(epoch % save_freq == 0):
            print(f'Epoch {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f}')
    
    if auto_reg:
        torch.save(model.state_dict(), f'saved_models/ar_transformer.pt')
    else:
        torch.save(model.state_dict(), f'saved_models/transformer.pt')

In [33]:
# Custom Testing Loop

def test_model(model, test_loader, criterion, auto_reg = False):
    model.eval()
    test_loss = 0
    all_inputs = []
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for input_batch, target_batch in test_loader:

            if auto_reg:
                logits_batch = MultiStepForwardPass(model, input_batch, target_batch)
            else:
                logits_batch = model(input_batch, target_batch)
            loss = criterion(logits_batch, target_batch)
            test_loss += loss.item()

            all_inputs.extend(input_batch.cpu().numpy())
            all_predictions.extend(logits_batch.cpu().numpy())
            all_targets.extend(target_batch.cpu().numpy())

    test_loss /= len(test_loader)

    return test_loss, all_inputs, all_predictions, all_targets


In [34]:
# Train the models
train_model(transformer, 100, optimizer, criterion, auto_reg=False)
train_model(ar_transformer, 100, ar_optimizer, criterion, auto_reg=True)

Single-Shot Model Training
Epoch 20/100 | Train Loss: 0.0816 | Validation Loss: 0.0826
Epoch 40/100 | Train Loss: 0.0416 | Validation Loss: 0.0315
Epoch 60/100 | Train Loss: 0.0310 | Validation Loss: 0.0237
Epoch 80/100 | Train Loss: 0.0236 | Validation Loss: 0.0204
Epoch 100/100 | Train Loss: 0.0182 | Validation Loss: 0.0210
Autoregressive Model Training
Epoch 20/100 | Train Loss: 0.0779 | Validation Loss: 0.0551
Epoch 40/100 | Train Loss: 0.0440 | Validation Loss: 0.0293
Epoch 60/100 | Train Loss: 0.0336 | Validation Loss: 0.0181
Epoch 80/100 | Train Loss: 0.0298 | Validation Loss: 0.0149
Epoch 100/100 | Train Loss: 0.0263 | Validation Loss: 0.0172


In [35]:
# Data Transfer

import pickle

single_shot_test_loss, single_shot_inputs, single_shot_predictions, single_shot_targets = test_model(transformer, test_loader, criterion)

single_shot_test_results = {
    'loss': single_shot_test_loss,
    'inputs': single_shot_inputs,
    'predictions': single_shot_predictions, 
    'targets': single_shot_targets
}

with open('results/single_shot_test_results.pkl', 'wb') as f:
    pickle.dump(single_shot_test_results, f)


auto_reg_test_loss, auto_reg_inputs, auto_reg_predictions, auto_reg_targets = test_model(ar_transformer, test_loader, criterion, auto_reg = True)

ar_test_results = {
    'loss': auto_reg_test_loss,
    'inputs': auto_reg_inputs,
    'predictions': auto_reg_predictions,
    'targets': auto_reg_targets
}

with open('results/auto_reg_test_results.pkl', 'wb') as f:
    pickle.dump(ar_test_results, f)

auto_reg_single_shot_test_loss, auto_reg_single_shot_inputs, auto_reg_single_shot_predictions, auto_reg_single_shot_targets = test_model(transformer, test_loader, criterion, auto_reg = True)

auto_reg_single_shot_test_results = {
    'loss': auto_reg_single_shot_test_loss,
    'inputs': auto_reg_single_shot_inputs,
    'predictions': auto_reg_single_shot_predictions,
    'targets': auto_reg_single_shot_targets
}

with open('results/auto_reg_single_shot_test_results.pkl', 'wb') as f:
    pickle.dump(auto_reg_single_shot_test_results, f)
