In [12]:
import pandas as pd
import os

from model import Transformer # this is the transformer.py file
import torch
from torch import nn
import numpy as np

In [13]:
# Data preprocessing

from data_processing import process_file

data_path = "data/"

all_data = pd.DataFrame()

for file in os.listdir(data_path):
    if file.endswith(".txt"):
        file_path = os.path.join(data_path, file)
        loc_df = process_file(file_path)

    # print(file, len(loc_df))
    if len(all_data) == 0:
        all_data = loc_df
    else:
        all_data = pd.merge(all_data, loc_df, on = ["year", "month"])

all_data.to_csv("data/processed_data.csv")

In [14]:
from data_processing import create_segments

input_years = 10  # Length of input period in years
target_years = 1   # Length of target period in years

input_segments, target_segments = create_segments(all_data, input_years, target_years)

print("# of segments:", len(input_segments))
print("Length of each input vector (months):", len(input_segments[0]))
print("Length of each target vector (months):", len(target_segments[0]))

# Convert to tensors
feature_columns = all_data.columns.difference(['year', 'month'])
input_tensors = torch.tensor([df[feature_columns].values for df in input_segments], dtype=torch.float32)
target_tensors = torch.tensor([df[feature_columns].values for df in target_segments], dtype=torch.float32)

# Print tensor lengths
print("\nTensor Lengths:")
print(f"There are {len(input_tensors)} input tensors and {len(target_tensors)} target tensors")
print(f"Input tensors length: {len(input_tensors[0])}")
print(f"Target tensors length: {len(target_tensors[0])}")


# of segments: 132
Length of each input vector (months): 120
Length of each target vector (months): 12

Tensor Lengths:
There are 132 input tensors and 132 target tensors
Input tensors length: 120
Target tensors length: 12


In [22]:
from dataset import TimeSeriesDataset
from torch.utils.data import DataLoader, random_split
batch_size = 8

ts_dataset = TimeSeriesDataset(input_tensors, target_tensors)
train_size = int(0.6 * len(ts_dataset))  # e.g., 70% of data for training
val_size = int(0.2 * len(ts_dataset))  # 20% of data for validation 
test_size = len(ts_dataset) - train_size - val_size  # Remaining for testing

# Randomly split the dataset into training and validation datasets
train_dataset, val_dataset, test_dataset = random_split(ts_dataset, [train_size, val_size, test_size])

# Create DataLoaders for both training and validation sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)  # Usually, no need to shuffle the validation set
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print(f"Train Loader Length: {len(train_loader)}")
print(f"Validation Loader Length: {len(val_loader)}")

input_batch, target_batch = next(iter(train_loader))

# Print the shapes
print(f'Input batch shape: {input_batch.shape}')  # e.g., torch.Size([8, 120, 15])
print(f'Target batch shape: {target_batch.shape}')  # e.g., torch.Size([8, 12, 15])

Train Loader Length: 13
Validation Loader Length: 3
Input batch shape: torch.Size([8, 120, 15])
Target batch shape: torch.Size([8, 12, 15])


#### Model

In [20]:
# Hyperparameters

d_model = 15
ffn_hidden = 2048
num_heads = 5
drop_prob = 0.1
num_layers = 1

transformer = Transformer(d_model, ffn_hidden, num_heads, drop_prob, num_layers)

criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(params=transformer.parameters(), lr = 1e-3)

In [21]:

def train_model(model, epochs, save_freq = 20):
    for epoch in range(1, epochs+1):
        
        model.train()
        train_loss = 0

        for input_batch, target_batch in train_loader:
            optimizer.zero_grad()

            logits_batch = model(input_batch, target_batch)
            
            loss = criterion(logits_batch, target_batch)
            loss.backward()

            optimizer.step()

            train_loss += loss.item()
            
        # Calculate average loss for the epoch
        train_loss /= len(train_loader)

        model.eval()

        val_loss = 0
        with torch.inference_mode():
            for input_batch, target_batch in val_loader:

                # Forward pass
                logits_batch = model(input_batch, target_batch)

                # Calculate loss
                loss = criterion(logits_batch, target_batch)

                val_loss += loss.item()
            
        val_loss /= len(val_loader)
        
        # Print epoch stats
        if(epoch % save_freq == 0):
            print(f'Epoch {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f}')
            torch.save(model.state_dict(), f'saved_models/transformer_epoch_{epoch+1}.pt')

train_model(transformer, 100, 20)

Epoch 20/100 | Train Loss: 0.0330 | Validation Loss: 0.0170
Epoch 40/100 | Train Loss: 0.0208 | Validation Loss: 0.0148
Epoch 60/100 | Train Loss: 0.0146 | Validation Loss: 0.0134
Epoch 80/100 | Train Loss: 0.0125 | Validation Loss: 0.0111
Epoch 100/100 | Train Loss: 0.0098 | Validation Loss: 0.0087


In [None]:
def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for input_batch, target_batch in test_loader:
            input_batch, target_batch = input_batch.to(device), target_batch.to(device)

            logits_batch = model(input_batch, target_batch)
            loss = criterion(logits_batch, target_batch)
            test_loss += loss.item()

            all_predictions.extend(logits_batch.cpu())
            all_targets.extend(target_batch.cpu())

    test_loss /= len(test_loader)

    return test_loss, all_predictions, all_targets
