In [11]:
import numpy as np
import pandas as pd

import os
import sys
sys.path.append(os.getenv("PROJECT_PATH"))

df = pd.read_csv('../../../data/train/ss1.csv')

DATA PROCESSING

In [12]:
# Remove a coluna de índice de tempo para focar nas posições
df = df.drop(columns=["frame_index"])

# Lista de entidades (jogadores + bola)
entity_names = df.columns.tolist()
n_entities = len(entity_names)
n_frames = len(df)

# Função para converter string "x,y" em vetor [x, y]
def parse_position(pos_str):
    try:
        x, y = map(float, pos_str.split(','))
        return np.array([x, y])
    except:
        return np.array([np.nan, np.nan])  # Para valores ausentes

# Inicializar e preencher matriz de posições
positions_array = np.zeros((n_frames, n_entities, 2))
for i, entity in enumerate(entity_names):
    positions_array[:, i, :] = np.stack(df[entity].apply(parse_position).values)

# Interpolar valores ausentes
def interpolate_array(array):
    for entity_idx in range(n_entities):
        for coord in range(2):
            series = array[:, entity_idx, coord]
            mask = np.isnan(series)
            not_nan = np.where(~mask)[0]
            if len(not_nan) > 1:
                array[:, entity_idx, coord] = np.interp(
                    x=np.arange(n_frames),
                    xp=not_nan,
                    fp=series[not_nan]
                )
            else:
                array[:, entity_idx, coord] = 0.0
    return array

positions_array = interpolate_array(positions_array)

# Normalizar posições
min_pos = np.nanmin(positions_array, axis=(0, 1))
max_pos = np.nanmax(positions_array, axis=(0, 1))
range_pos = max_pos - min_pos
range_pos[range_pos == 0] = 1.0
positions_array_norm = (positions_array - min_pos) / range_pos

# Criar máscara de presença
def generate_mask(df, entity_names):
    mask = np.ones((n_frames, n_entities), dtype=np.float32)
    for i, entity in enumerate(entity_names):
        for t in range(n_frames):
            if pd.isna(df.iloc[t][entity]):
                mask[t, i] = 0.0
    return mask

mask_array = generate_mask(df, entity_names)

# Parâmetros da sequência
N, M = 10, 5
num_samples = n_frames - N - M + 1
X_norm = np.zeros((num_samples, N, n_entities, 2))
Y_norm = np.zeros((num_samples, M, n_entities, 2))
X_mask = np.zeros((num_samples, N, n_entities))
Y_mask = np.zeros((num_samples, M, n_entities))

for i in range(num_samples):
    X_norm[i] = positions_array_norm[i:i+N]
    Y_norm[i] = positions_array_norm[i+N:i+N+M]
    X_mask[i] = mask_array[i:i+N]
    Y_mask[i] = mask_array[i+N:i+N+M]


MODEL DEFINITION

In [13]:
import torch
import torch.nn as nn

from notebooks.predictions.TF.trajectorytransformer import TrajectoryTransformer

# Exemplo de uso
model = TrajectoryTransformer()
example_input = torch.tensor(X_norm, dtype=torch.float32)
output = model(example_input)
print(output.shape)


TRAINING

In [14]:
# === 3. Dataset e DataLoader ===
from torch.utils.data import Dataset, DataLoader

from notebooks.predictions.TF.trajectorydataset import TrajectoryDataset

# Criar dataset e dataloader
train_dataset = TrajectoryDataset(X_norm, Y_norm, X_mask, Y_mask)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)


In [15]:
# === 4. Treinamento do modelo ===
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

n_epochs = 100
model.train()

for epoch in range(n_epochs):
    total_loss = 0
    for batch in train_loader:
        if len(batch) == 4:
            batch_X, batch_Y, batch_X_mask, batch_Y_mask = batch
        else:
            batch_X, batch_Y = batch

        batch_X = batch_X.to(device)
        batch_Y = batch_Y.to(device)

        pred_Y = model(batch_X[:, -5:])  # usar os últimos 5 frames como entrada se necessário
        loss = criterion(pred_Y, batch_Y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{n_epochs} - Loss: {avg_loss:.4f}")


In [16]:
#Avaliação do modelo
model.eval()
with torch.no_grad():
    sample_X = torch.tensor(X_norm[0:1], dtype=torch.float32).to(device)
    pred_Y = model(sample_X[:, -5:])  # (1, 5, 22, 2)
    print("Predicted trajectory shape:", pred_Y.shape)

In [None]:
# Salvar modelo
torch.save(model.state_dict(), '../../../data/models/trajectory_transformer.pth')