In [1]:
import os
import re
import glob
import math
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
# Collate function (same as original)
def collate_fn(batch):
    if len(batch[0]) == 3:  # Training data with future
        pasts, masks, futures = zip(*batch)
        past = torch.stack(pasts)
        mask = torch.stack(masks)
        future = torch.stack(futures)
        return past, mask, future
    else:  # Test data without future
        pasts, masks = zip(*batch)
        past = torch.stack(pasts)
        mask = torch.stack(masks)
        return past, mask

class TrajectoryDataset(Dataset):
    def __init__(self, input_path=None, data=None, T_past=50, T_future=60, is_test=False):
        if data is not None:
            self.data = data
        else:
            npz = np.load(input_path)
            self.data = npz['data']
        self.T_past = T_past
        self.T_future = T_future
        self.is_test = is_test
        
        # Calculate normalization statistics from the past data
        self.calculate_normalization_stats()
        
    def calculate_normalization_stats(self):
        """Calculate mean and std for efficient normalization"""
        # Only consider non-zero values for position and velocity
        positions = self.data[..., :2]  # x, y positions
        mask = np.abs(positions).sum(axis=-1) > 0
        
        if mask.sum() > 0:
            valid_positions = positions[mask]
            self.pos_mean = valid_positions.mean(axis=0)
            self.pos_std = valid_positions.std(axis=0)
            
            # Ensure std is not zero to avoid division by zero
            self.pos_std = np.maximum(self.pos_std, 1e-6)
        else:
            self.pos_mean = np.zeros(2)
            self.pos_std = np.ones(2)
            
        # Same for velocities
        velocities = self.data[..., 2:4]  # vx, vy velocities
        mask = np.abs(velocities).sum(axis=-1) > 0
        
        if mask.sum() > 0:
            valid_velocities = velocities[mask]
            self.vel_mean = valid_velocities.mean(axis=0)
            self.vel_std = valid_velocities.std(axis=0)
            self.vel_std = np.maximum(self.vel_std, 1e-6)
        else:
            self.vel_mean = np.zeros(2)
            self.vel_std = np.ones(2)
            
        # Heading is already in radians, so we don't normalize it
        
    def normalize_features(self, features):
        """Normalize features efficiently"""
        normalized = features.copy()
        # Normalize positions (x, y)
        normalized[..., 0:2] = (features[..., 0:2] - self.pos_mean) / self.pos_std
        # Normalize velocities (vx, vy)
        normalized[..., 2:4] = (features[..., 2:4] - self.vel_mean) / self.vel_std
        # Normalize acceleration (ax, ay)
        normalized[..., 4:6] = (features[..., 4:6] - self.vel_mean) / self.vel_std  # You might want to use vel stats for acceleration normalization
        return normalized
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        scene = self.data[idx]  # (num_agents, T, 6) where 6 should be x, y, vx, vy, ax, ay

        # Check the shape of `scene` to ensure it has the correct number of features
        print(f"Original scene shape: {scene.shape}")  # Should be (num_agents, T, 6)

        # Extract past trajectory of all agents (first T time steps)
        past = scene[:, :self.T_past, :]  # (num_agents, T_past, 6)
        
        # Check the shape of `past`
        print(f"Shape of past: {past.shape}")  # Should be (num_agents, T_past, 6)

        # Compute velocity (dx, dy) by differentiating position
        velocity = np.diff(past[:, :, :2], axis=1, append=np.zeros((past.shape[0], 1, 2)))  # (num_agents, T_past, 2)
        
        # Compute acceleration (dvx, dvy) by differentiating velocity
        acceleration = np.diff(velocity, axis=1, append=np.zeros((past.shape[0], 1, 2)))  # (num_agents, T_past, 2)
        
        # Concatenate velocity and acceleration as additional features
        past_with_derived_features = np.concatenate([past, velocity, acceleration], axis=-1)  # (num_agents, T_past, 10)

        # Check the shape of `past_with_derived_features` before passing it to normalization
        print(f"Shape of past_with_derived_features: {past_with_derived_features.shape}")  # Should be (num_agents, T_past, 10)

        # Ensure we only have 6 features (x, y, vx, vy, ax, ay)
        if past_with_derived_features.shape[-1] != 6:
            # If there are more than 6 features, remove the extras
            past_with_derived_features = past_with_derived_features[..., :6]
            print(f"Adjusted past_with_derived_features shape: {past_with_derived_features.shape}")

        # Normalize the features
        past_normalized = self.normalize_features(past_with_derived_features)

        # Create mask for valid agents (based on position, vx, vy)
        mask = np.sum(np.abs(past[:, :, :2]), axis=(1, 2)) > 0

        # For training data, also extract and normalize future trajectory of ego vehicle
        if not self.is_test and scene.shape[1] >= self.T_past + self.T_future:
            future = scene[0, self.T_past:self.T_past+self.T_future, :2]  # Ego vehicle future (x, y)
            # Normalize future coordinates
            future_normalized = (future - self.pos_mean) / self.pos_std

            return torch.tensor(past_normalized, dtype=torch.float32), torch.tensor(mask, dtype=torch.bool), torch.tensor(future_normalized, dtype=torch.float32)

        # For test data, only return normalized past
        return torch.tensor(past_normalized, dtype=torch.float32), torch.tensor(mask, dtype=torch.bool)
    
    def denormalize_prediction(self, prediction):
        """Convert normalized predictions back to original scale"""
        return prediction * self.pos_std + self.pos_mean

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class AgentTypeEmbedding(nn.Module):
    def __init__(self, num_types=10, d_model=128):
        super().__init__()
        self.embedding = nn.Embedding(num_types, d_model)
        
    def forward(self, x):
        obj_type = x[..., -1].long()
        return self.embedding(obj_type)

class ImprovedTrajectoryTransformer(nn.Module):
    def __init__(self, feature_dim=6, d_model=256, nhead=8,
                 num_layers=4, dim_feedforward=512, 
                 T_past=50, T_future=60, dropout=0.1):
        super().__init__()
        self.d_model = d_model
        self.T_past = T_past
        self.T_future = T_future
        
        # Separate embedding for features (positions, velocities, heading)
        self.feature_embed = nn.Linear(feature_dim, d_model)  # -1 for object type
        
        # Object type embedding
        self.type_embedding = AgentTypeEmbedding(num_types=10, d_model=d_model)
        
        # Positional encoding for timesteps
        self.pos_encoding = PositionalEncoding(d_model)
        
        # Layer normalization
        self.norm = nn.LayerNorm(d_model)
        
        # Transformer encoder for temporal relations
        temporal_encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout,
            batch_first=False
        )
        self.temporal_encoder = nn.TransformerEncoder(
            temporal_encoder_layer, 
            num_layers=num_layers//2
        )
        
        # Transformer encoder for social relations
        social_encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout,
            batch_first=False
        )
        self.social_encoder = nn.TransformerEncoder(
            social_encoder_layer, 
            num_layers=num_layers//2
        )
        
        # Output MLP
        self.prediction_head = nn.Sequential(
            nn.Linear(d_model, dim_feedforward),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim_feedforward, dim_feedforward // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim_feedforward // 2, 2 * T_future)
        )
        
    def forward(self, past, agent_mask):
        B, N, T, F = past.shape  # Batch, Num_agents, Time, Features
        
        # Split features and object type
        features = past[..., :-1]  # All but last dimension
        
        # Reshape to process all agent-timesteps together
        features_flat = features.reshape(B * N * T, F-1)
        
        # Embed features
        feature_embedding = self.feature_embed(features_flat)
        feature_embedding = feature_embedding.reshape(B, N, T, self.d_model)
        
        # Get object type embedding
        type_embedding = self.type_embedding(past)  # B, N, T, d_model
        
        # Combine embeddings
        combined_embedding = feature_embedding + type_embedding
        
        # Reshape for temporal transformer: (T, B*N, d_model)
        temporal_input = combined_embedding.permute(2, 0, 1, 3).reshape(T, B*N, self.d_model)
        
        # Add positional encoding
        temporal_input = self.pos_encoding(temporal_input)
        
        # Apply temporal transformer
        temporal_output = self.temporal_encoder(temporal_input)
        
        # Get the last temporal state for each agent
        agent_features = temporal_output[-1].reshape(B, N, self.d_model)  # B, N, d_model
        
        # Make sure there's at least one valid agent per batch
        if (~agent_mask).all(dim=1).any():
            fallback_mask = agent_mask.clone()
            fallback_mask[:, 0] = True  # At least use ego vehicle
            agent_mask = torch.where(agent_mask.sum(dim=1, keepdim=True) == 0, fallback_mask, agent_mask)
        
        # Prepare for social transformer: (N, B, d_model)
        social_input = agent_features.permute(1, 0, 2)
        
        # Apply social transformer with masking
        social_output = self.social_encoder(social_input, src_key_padding_mask=~agent_mask)
        
        # Extract ego vehicle embedding
        ego_embedding = social_output[0]  # B, d_model
        
        # Apply prediction head
        trajectory_flat = self.prediction_head(ego_embedding)  # B, 2*T_future
        
        # Reshape to (Batch, Time, XY)
        predictions = trajectory_flat.reshape(B, self.T_future, 2)
        
        return predictions



In [4]:
def train_epoch(model, dataloader, optimizer, device, clip_grad=.3):
    model.train()
    total_loss = 0.0
    criterion = nn.SmoothL1Loss()
    
    for batch in dataloader:
        past, mask, future = [x.to(device) for x in batch]
        
        optimizer.zero_grad()
        pred = model(past, mask)
        
        loss = criterion(pred, future)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_grad)
        
        optimizer.step()
        total_loss += loss.item() * past.size(0)
    
    return total_loss / len(dataloader.dataset)

def evaluate(model, val_loader, device):
    model.eval()
    total_loss = 0.0
    criterion = nn.MSELoss()
    
    with torch.no_grad():
        for batch in val_loader:
            past, mask, future = [x.to(device) for x in batch]
            pred = model(past, mask)
            loss = criterion(pred, future)
            total_loss += loss.item() * past.size(0)
    
    return total_loss / len(val_loader.dataset)

def predict(model, test_loader, test_dataset, device):
    model.eval()
    all_preds = []
    
    with torch.no_grad():
        for batch in test_loader:
            past, mask = [x.to(device) for x in batch]
            pred = model(past, mask)
            denorm_pred = test_dataset.denormalize_prediction(pred.cpu().numpy())
            all_preds.append(denorm_pred)
    
    return np.concatenate(all_preds, axis=0)

def get_latest_checkpoint(folder):
    files = glob.glob(os.path.join(folder, "ckpt_epoch_*.pt"))
    if not files:
        return None
    return max(files, key=lambda f: int(re.findall(r"ckpt_epoch_(\d+)", f)[0]))




In [5]:
train_input = 'data/train.npz'
test_input = 'data/test_input.npz'
output_csv = 'predictions.csv'
checkpoint_path = 'best_model.pt'
checkpoints_dir = 'checkpoints'

# Hyperparameters
batch_size = 64
lr = 1e-4
weight_decay = 1e-5
epochs = 300
patience = 15  # Early stopping patience
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Using device: {device}")

Using device: cuda


In [6]:
# Load data
full_data = np.load(train_input)['data']

# Split into train and validation (7:3)
num_samples = len(full_data)
num_train = int(0.8 * num_samples)
perm = np.random.permutation(num_samples)
train_idx = perm[:num_train]
val_idx = perm[num_train:]

train_data = full_data[train_idx]
val_data = full_data[val_idx]

# Create datasets with normalization
train_ds = TrajectoryDataset(data=train_data)
val_ds = TrajectoryDataset(data=val_data)

# Create test dataset using the same normalization stats as training
test_ds = TrajectoryDataset(input_path=test_input, is_test=True)
# Copy normalization stats from train_ds
test_ds.pos_mean = train_ds.pos_mean
test_ds.pos_std = train_ds.pos_std
test_ds.vel_mean = train_ds.vel_mean
test_ds.vel_std = train_ds.vel_std

# Create data loaders
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [7]:
# Create model, optimizer, and scheduler
model = ImprovedTrajectoryTransformer(dropout=.3).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.9, 0.999))
warm_up_epochs = 5
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs - warm_up_epochs, eta_min=1e-6)
warm_up_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: (epoch + 1) / warm_up_epochs if epoch < warm_up_epochs else 1)
os.makedirs(checkpoints_dir, exist_ok=True)

# Training setup
start_epoch = 1
best_val_loss = float('inf')
no_improve_epochs = 0

# Try to load checkpoint
latest_ckpt = get_latest_checkpoint(checkpoints_dir)
if latest_ckpt:
    print(f"Loading checkpoint: {latest_ckpt}")
    ckpt = torch.load(latest_ckpt, map_location=device)
    model.load_state_dict(ckpt['model_state_dict'])
    optimizer.load_state_dict(ckpt['optimizer_state_dict'])
    start_epoch = ckpt['epoch'] + 1
    best_val_loss = ckpt.get('val_loss', float('inf'))
    print(f"✅ Resumed from epoch {start_epoch - 1} with val_loss={best_val_loss:.6f}")

# Training loop
print(f"Starting training from epoch {start_epoch}")
for epoch in range(start_epoch, epochs + 1):
    # Train for one epoch
    train_loss = train_epoch(model, train_loader, optimizer, device)
    
    # Evaluate on validation set
    val_loss = evaluate(model, val_loader, device)
    
    # Update learning rate
    if epoch <= warm_up_epochs:
        warm_up_scheduler.step()
    else:
        scheduler.step()
    
    # Print progress
    print(f"Epoch {epoch}/{epochs} | Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f}")
    
    # Save best model
    if val_loss <= best_val_loss and epoch > warm_up_epochs:
        best_val_loss = val_loss
        no_improve_epochs = 0
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': best_val_loss
        }, checkpoint_path)
        print(f"✅ Best model saved at epoch {epoch} (val loss: {best_val_loss:.6f})")
    else:
        no_improve_epochs += 1
    
    # Save periodic checkpoint
    if epoch % 10 == 0:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss
        }, f'{checkpoints_dir}/ckpt_epoch_{epoch:04d}.pt')
        print(f"🧪 Checkpoint saved at {checkpoints_dir}/ckpt_epoch_{epoch:04d}.pt")
    
    # Early stopping
    # if no_improve_epochs >= patience:
    #     print(f"Early stopping triggered after {epoch} epochs")
    #     break





Starting training from epoch 1
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_derived_features shape: (50, 50, 6)
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_derived_features shape: (50, 50, 6)
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_derived_features shape: (50, 50, 6)
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_derived_features shape: (50, 50, 6)
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_derived_features shape: (50, 50, 6)
Original scene shape: (50, 110, 6)
Shape of past: (50, 50, 6)
Shape of past_with_derived_features: (50, 50, 10)
Adjusted past_with_der

RuntimeError: shape '[-1, 6]' is invalid for input of size 800000

In [None]:
# Load best model for prediction
print("Loading best model for prediction...")
model.load_state_dict(torch.load(checkpoint_path, map_location=device)['model_state_dict'])

# Generate predictions
print("Generating predictions...")
preds = predict(model, test_loader, test_ds, device)

# Flatten predictions to match submission format (2100*60, 2)
preds_flat = preds.reshape(-1, 2)

# Create ID column (required for submission)
ids = np.arange(len(preds_flat))

# Save predictions to CSV
output = np.column_stack((ids, preds_flat))
header = "ID,x,y"
np.savetxt(output_csv, output, delimiter=',', header=header, comments='', fmt=['%d', '%.6f', '%.6f'])
print(f"Predictions saved to {output_csv}")