In [None]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c cse-251-b-2025

In [None]:
!unzip cse-251-b-2025.zip -d argoverse_data/

Archive:  cse-251-b-2025.zip
  inflating: argoverse_data/test_input.npz  
  inflating: argoverse_data/train.npz  


In [None]:
train_file = np.load('argoverse_data/train.npz')
train_data = train_file['data']
print("train_data's shape", train_data.shape)
test_file = np.load('argoverse_data/test_input.npz')
test_data = test_file['data']
print("test_data's shape", test_data.shape)

train_data's shape (10000, 50, 110, 6)
test_data's shape (2100, 50, 50, 6)


In [None]:
INPUT_TIME_STEPS = 50
OUTPUT_TIME_STEPS = 60
NUM_AGENTS = 50
NUM_FEATURES = 6
EGO_AGENT_IDX = 0  # ego vehicle is always the first agent (index 0)

In [None]:
def build_transformer_model(
    input_shape_ego=(INPUT_TIME_STEPS, NUM_FEATURES),
    input_shape_context=(NUM_AGENTS-1, INPUT_TIME_STEPS, NUM_FEATURES),
    output_time_steps=OUTPUT_TIME_STEPS,
    d_model=128,
    num_heads=8,
    ff_dim=512,
    num_transformer_blocks=4,
    dropout_rate=0.1
):
    """
    Build a transformer model for predicting agent trajectory
    that properly handles ego vehicle and context agent inputs.

    Args:
        input_shape_ego: Shape of ego vehicle input (time_steps, features)
        input_shape_context: Shape of context agents input (num_agents-1, time_steps, features)
        output_time_steps: Number of time steps to predict
        d_model: Dimension of transformer model
        num_heads: Number of attention heads
        ff_dim: Feed-forward network dimension
        num_transformer_blocks: Number of transformer blocks
        dropout_rate: Dropout rate

    Returns:
        Keras model for trajectory prediction
    """
    # Ego vehicle input
    ego_input = layers.Input(shape=input_shape_ego, name="ego_input")

    # Context (other agents) input
    context_input = layers.Input(shape=input_shape_context, name="context_input")

    # --- Process ego vehicle data ---
    # Project ego input to embedding dimension
    ego_embedding = layers.Dense(d_model)(ego_input)

    # Add positional encoding to ego embedding using Keras operations
    # Create positional encoding with shape [1, INPUT_TIME_STEPS, d_model]
    pos_encoding = get_positional_encoding(INPUT_TIME_STEPS, d_model)
    # Properly broadcast it to match the batch dimension using a Lambda layer
    ego_embedding = layers.Add()([
        ego_embedding,
        layers.Lambda(lambda x: tf.tile(tf.expand_dims(pos_encoding, 0),
                                       [tf.shape(x)[0], 1, 1]))(ego_embedding)
    ])

    # Process ego data through transformer blocks
    x_ego = ego_embedding
    for _ in range(num_transformer_blocks):
        x_ego = transformer_encoder_block(x_ego, d_model, num_heads, ff_dim, dropout_rate)

    # --- Process context (other agents) data ---
    # First get a flattened representation of all agents
    num_agents = input_shape_context[0]

    # Option 1: Process each agent with shared LSTM and keep sequence length
    context_lstm = layers.TimeDistributed(
        layers.LSTM(d_model, return_sequences=True)
    )(context_input)

    # Option 2: Another approach is to use attention across agents
    # This creates an agent-aware representation
    # Use a safer approach with TimeDistributed layers
    agent_features = layers.TimeDistributed(
        layers.LSTM(d_model, return_sequences=True)
    )(context_input)

    # Compute attention weights across agents
    agent_attn = layers.TimeDistributed(
        layers.Dense(1, activation='tanh')
    )(agent_features)
    agent_attn = layers.Reshape((num_agents, INPUT_TIME_STEPS))(agent_attn)
    agent_attn = layers.Softmax(axis=1)(agent_attn)
    agent_attn = layers.Reshape((num_agents, INPUT_TIME_STEPS, 1))(agent_attn)

    # Apply attention weights to get agent-aware context
    context_weighted = layers.Multiply()([context_lstm, agent_attn])
    context_aggregated = layers.Lambda(
        lambda x: tf.reduce_sum(x, axis=1)
    )(context_weighted)

    # Add positional encoding to context
    context_aggregated = layers.Add()([
        context_aggregated,
        layers.Lambda(lambda x: tf.tile(tf.expand_dims(pos_encoding, 0),
                                       [tf.shape(x)[0], 1, 1]))(context_aggregated)
    ])

    # --- Combine ego and context information ---
    # Now both ego and context have shape (batch, INPUT_TIME_STEPS, d_model)
    # Concatenate on feature dimension
    combined = layers.Concatenate(axis=2)([x_ego, context_aggregated])

    # Project back to d_model dimension
    combined = layers.Dense(d_model)(combined)

    # Process combined data through additional transformer blocks
    for _ in range(2):
        combined = transformer_encoder_block(combined, d_model, num_heads, ff_dim, dropout_rate)

    # Use GRU to generate the trajectory sequence
    decoder_outputs = layers.GRU(d_model, return_sequences=True)(combined)

    # Project to output positions (x, y)
    outputs = layers.TimeDistributed(layers.Dense(2))(decoder_outputs)

    # Handle output time steps
    if output_time_steps != INPUT_TIME_STEPS:
        # Option 1: Use a Dense layer to project to the desired output time steps
        reshaped_outputs = layers.Reshape((INPUT_TIME_STEPS * 2,))(outputs)
        outputs = layers.Dense(output_time_steps * 2)(reshaped_outputs)
        outputs = layers.Reshape((output_time_steps, 2))(outputs)

        # Option 2: Take the first output_time_steps steps
        # outputs = layers.Lambda(lambda x: x[:, :output_time_steps, :])(outputs)

    # Create the model
    model = keras.Model(inputs=[ego_input, context_input], outputs=outputs)

    return model

In [None]:
def get_positional_encoding(seq_len, d_model):
    """
    Create positional encoding for transformer model
    """
    # Implementation of positional encoding from "Attention Is All You Need" paper
    positions = tf.range(start=0, limit=seq_len, delta=1.0, dtype=tf.float32)
    positions = tf.expand_dims(positions, axis=1)

    # Calculate the angles for the positional encoding
    div_term = tf.pow(10000.0, tf.range(0, d_model, 2.0) / d_model)
    div_term = tf.expand_dims(div_term, axis=0)

    # Calculate sin and cos components
    angles = positions / div_term

    # Apply sin to even indices and cos to odd indices
    sines = tf.sin(angles)
    cosines = tf.cos(angles)

    # Interleave sines and cosines
    pos_encoding = tf.stack([sines, cosines], axis=2)
    pos_encoding = tf.reshape(pos_encoding, [seq_len, d_model])

    return pos_encoding

In [None]:
def transformer_encoder_block(inputs, d_model, num_heads, ff_dim, dropout_rate=0.1):
    """
    Transformer encoder block with multi-head attention
    """
    # Multi-head attention
    attention_output = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=d_model // num_heads
    )(inputs, inputs)
    attention_output = layers.Dropout(dropout_rate)(attention_output)
    attention_output = layers.LayerNormalization(epsilon=1e-6)(inputs + attention_output)

    # Feed forward network
    ffn_output = layers.Dense(ff_dim, activation="relu")(attention_output)
    ffn_output = layers.Dense(d_model)(ffn_output)
    ffn_output = layers.Dropout(dropout_rate)(ffn_output)

    # Add & Norm
    output = layers.LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)

    return output

In [None]:
def weighted_mse_loss(y_true, y_pred):
    """Custom loss function with higher weight for near-future predictions"""
    # Create weights that decay over time
    time_steps = tf.shape(y_true)[1]
    weights = 1.0 / (tf.range(time_steps, dtype=tf.float32) / 10.0 + 1.0)
    weights = weights[tf.newaxis, :, tf.newaxis]  # Add batch and feature dimensions

    # Calculate weighted MSE
    squared_error = tf.square(y_true - y_pred)
    weighted_squared_error = squared_error * weights
    return tf.reduce_mean(weighted_squared_error)

In [None]:
def prepare_data_for_training(train_data, batch_size=32, shuffle=True, val_split=0.1):
    """Prepare data for training by creating ego and context inputs and trajectory outputs"""
    n_samples = train_data.shape[0]
    n_val = int(n_samples * val_split)
    n_train = n_samples - n_val

    # Split data
    if shuffle:
        indices = np.random.permutation(n_samples)
        train_indices = indices[:n_train]
        val_indices = indices[n_train:]
    else:
        train_indices = np.arange(n_train)
        val_indices = np.arange(n_train, n_samples)

    # Training data
    train_ego = train_data[train_indices, EGO_AGENT_IDX, :INPUT_TIME_STEPS, :]
    train_context = train_data[train_indices, 1:, :INPUT_TIME_STEPS, :]
    train_targets = train_data[train_indices, EGO_AGENT_IDX, INPUT_TIME_STEPS:INPUT_TIME_STEPS+OUTPUT_TIME_STEPS, :2]

    # Validation data
    val_ego = train_data[val_indices, EGO_AGENT_IDX, :INPUT_TIME_STEPS, :]
    val_context = train_data[val_indices, 1:, :INPUT_TIME_STEPS, :]
    val_targets = train_data[val_indices, EGO_AGENT_IDX, INPUT_TIME_STEPS:INPUT_TIME_STEPS+OUTPUT_TIME_STEPS, :2]

    # Create TF datasets
    train_dataset = tf.data.Dataset.from_tensor_slices(
        ({"ego_input": train_ego, "context_input": train_context}, train_targets)
    )
    train_dataset = train_dataset.shuffle(buffer_size=n_train).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    val_dataset = tf.data.Dataset.from_tensor_slices(
        ({"ego_input": val_ego, "context_input": val_context}, val_targets)
    )
    val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_dataset, val_dataset

In [None]:
def train_model(model, train_dataset, val_dataset, epochs=50):
    """Train the model and return training history"""
    optimizer = keras.optimizers.Adam(learning_rate=1e-4)

    # Compile the model
    model.compile(optimizer=optimizer, loss=weighted_mse_loss)

    # Add callbacks
    callbacks = [
        keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3),
        keras.callbacks.ModelCheckpoint(
            "best_trajectory_model.weights.h5",
            save_best_only=True,
            save_weights_only=True
        )
    ]

    # Train the model
    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=epochs,
        callbacks=callbacks
    )

    return history

In [None]:
def generate_predictions(model, test_data):
    """Generate predictions for test data"""
    batch_size = 32
    num_test_samples = test_data.shape[0]

    predictions = []

    for i in range(0, num_test_samples, batch_size):
        batch_end = min(i + batch_size, num_test_samples)
        batch_test_ego = test_data[i:batch_end, EGO_AGENT_IDX, :, :]
        batch_test_context = test_data[i:batch_end, 1:, :, :]

        batch_predictions = model.predict({
            "ego_input": batch_test_ego,
            "context_input": batch_test_context
        })
        predictions.append(batch_predictions)

    # Concatenate all predictions
    predictions = np.concatenate(predictions, axis=0)

    return predictions

In [None]:
def visualize_trajectories(input_traj, true_traj, pred_traj, num_samples=5):
    """Visualize sample trajectories"""
    plt.figure(figsize=(15, 10))

    for i in range(num_samples):
        plt.subplot(2, 3, i+1)

        # Plot input trajectory
        plt.plot(input_traj[i, :, 0], input_traj[i, :, 1], 'b-', alpha=0.7, label='Input')

        # Plot true trajectory if available
        if true_traj is not None:
            plt.plot(true_traj[i, :, 0], true_traj[i, :, 1], 'g-', alpha=0.7, label='True')

        # Plot predicted trajectory
        plt.plot(pred_traj[i, :, 0], pred_traj[i, :, 1], 'r-', alpha=0.7, label='Predicted')

        # Mark the starting points
        plt.scatter(input_traj[i, -1, 0], input_traj[i, -1, 1], color='blue', s=50, marker='o')

        plt.axis('equal')
        plt.grid(True)
        plt.title(f'Sample {i+1}')
        plt.legend()

    plt.tight_layout()
    plt.savefig('trajectory_samples.png')
    plt.close()

In [None]:
# Prepare data
print("Preparing data...")
train_dataset, val_dataset = prepare_data_for_training(train_data, batch_size=32)

# Create model using the simpler architecture with GRU
print("Creating model...")
# Choose one model architecture:
model = build_transformer_model()

# Print model summary
model.summary()

# Train model
print("Training model...")
history = train_model(model, train_dataset, val_dataset, epochs=50)

# Save model
model.save_weights('trajectory_model_weights.h5')

# Generate predictions for test data
print("Generating predictions...")
predictions = generate_predictions(model, test_data)

Preparing data...
Creating model...


Training model...
Epoch 1/50
[1m127/282[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m19:48[0m 8s/step - loss: 3882922.0000

KeyboardInterrupt: 