Transformer

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("../../data/train/train_interpolated_xy.csv")
df = df.drop(columns=["frame_index"])  # Drop frame index for training

In [None]:
# Normalize the data
scaler = StandardScaler()
data = scaler.fit_transform(df.values)

# Split into train and test
split_idx = int(len(data) * 0.75)
train_data = data[:split_idx]
test_data = data[split_idx:]

# Prepare sequences
SEQ_LEN = 30
PRED_LEN = 1

class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_len, pred_len):
        self.data = data
        self.seq_len = seq_len
        self.pred_len = pred_len

    def __len__(self):
        return len(self.data) - self.seq_len - self.pred_len + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_len]
        y = self.data[idx + self.seq_len:idx + self.seq_len + self.pred_len]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

train_dataset = TimeSeriesDataset(train_data, SEQ_LEN, PRED_LEN)
test_dataset = TimeSeriesDataset(test_data, SEQ_LEN, PRED_LEN)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, seq_len, pred_len, d_model=64):
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=8, dropout=0.1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=3)
        self.fc_out = nn.Linear(d_model, input_size)
        self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, d_model))

    def forward(self, src):
        src = self.input_proj(src) + self.pos_embedding
        encoded = self.transformer_encoder(src)
        out = self.fc_out(encoded[:, -1])
        return out.unsqueeze(1)

In [None]:
# Model setup
input_size = train_data.shape[1]
model = TransformerModel(input_size=input_size, seq_len=SEQ_LEN, pred_len=PRED_LEN, d_model=128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
criterion = nn.SmoothL1Loss()

In [None]:
# Training loop
EPOCHS = 100
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(x_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    scheduler.step()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")

In [None]:
# Evaluation
model.eval()
preds, trues = [], []
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        output = model(x_batch)
        preds.append(output.numpy())
        trues.append(y_batch.numpy())

preds = np.concatenate(preds, axis=0).reshape(-1, input_size)
trues = np.concatenate(trues, axis=0).reshape(-1, input_size)

In [None]:
# Inverse transform
preds_inv = scaler.inverse_transform(preds)
trues_inv = scaler.inverse_transform(trues)

In [None]:
# Compute RMSE
rmse = np.sqrt(mean_squared_error(trues_inv[:, -2:], preds_inv[:, -2:]))
print(f"Ball Position RMSE: {rmse:.2f}")

Save Model

In [None]:
import os
# Directory where model will be saved
save_dir = "../../data/models/tf"
os.makedirs(save_dir, exist_ok=True)

# Base filename
version = 1
while os.path.exists(os.path.join(save_dir, f"v{version}.pth")):
    version += 1

# Final path
save_path = os.path.join(save_dir, f"v{version}.pth")

# Save the model
torch.save(model.state_dict(), save_path)

Save data

In [None]:
# Get feature names
feature_names = df.columns.tolist()

# Create dataframe with predictions
output_all = pd.DataFrame(preds_inv, columns=[f"pred_{col}" for col in feature_names])
output_all[[f"true_{col}" for col in feature_names]] = pd.DataFrame(trues_inv, columns=[f"true_{col}" for col in feature_names])
output_all.insert(0, "frame_index", np.arange(len(output_all)))  # Optional frame index

# Save to CSV
output_all.to_csv("../../data/predicted/tf_pred.csv", index=False)