In [1]:
# load data
import numpy as np
train_file = np.load('data/train.npz')
train_data = train_file['data']
print("train_data's shape", train_data.shape)
test_file = np.load('data/test_input.npz')
test_data = test_file['data']
print("test_data's shape", test_data.shape)


train_data's shape (10000, 50, 110, 6)
test_data's shape (2100, 50, 50, 6)


In [2]:
# preprocess
def preprocess_data(data):
    """
    Removes padded agents (agents with all zero values across time steps).
    
    Args:
        data (numpy.ndarray): Shape (scenarios, agents, time_steps, dimensions)
    
    Returns:
        numpy.ndarray: Filtered dataset without padded agents.
    """
    scenarios, agents, time_steps, dimensions = data.shape
    processed_data = []

    for i in range(scenarios):
        scenario_data = data[i]  # Shape (agents, time_steps, dimensions)
        
        # Identify non-padded agents (at least one nonzero value across all time steps)
        valid_agents = np.any(scenario_data != 0, axis=(1, 2))  # Shape (agents,)
        
        # Filter out only the valid agents
        filtered_agents = scenario_data[valid_agents]  # Shape (valid_agents, time_steps, dimensions)
        
        processed_data.append(filtered_agents)

    return processed_data  # List of variable-length arrays per scenario

train_data_processed = preprocess_data(train_data)
test_data_processed = preprocess_data(test_data)

# Print results
print(f"Original Train Data Shape: {train_data.shape}")
print(f"Processed Train Data Length: {len(train_data_processed)} (variable agents per scenario)")

print(f"Original Test Data Shape: {test_data.shape}")
print(f"Processed Test Data Length: {len(test_data_processed)} (variable agents per scenario)")

Original Train Data Shape: (10000, 50, 110, 6)
Processed Train Data Length: 10000 (variable agents per scenario)
Original Test Data Shape: (2100, 50, 50, 6)
Processed Test Data Length: 2100 (variable agents per scenario)


In [3]:
# some kind of missing trajectory handling? 

In [ ]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input, RepeatVector, TimeDistributed, Dropout

In [ ]:
def create_lstm_encoder_decoder(
    input_dim=6,
    output_dim=2,
    timesteps_in=50,
    timesteps_out=60,
    lstm_units=128,
    num_layers=2,          # New parameter for stacking
    loss_fn='huber',
    lr=1e-3
):
    encoder_inputs = Input(shape=(timesteps_in, input_dim))
    x = encoder_inputs

    # Encoder: dynamically stacked LSTMs
    for i in range(num_layers - 1):
        x = LSTM(lstm_units, return_sequences=True)(x)
        x = Dropout(0.2)(x)
    x = LSTM(lstm_units)(x)  # Final layer with return_sequences=False

    decoder_input = RepeatVector(timesteps_out)(x)

    # Decoder: dynamically stacked LSTMs
    x = decoder_input
    for _ in range(num_layers):
        x = LSTM(lstm_units, return_sequences=True)(x)
        x = Dropout(0.2)(x)

    decoder_outputs = TimeDistributed(Dense(output_dim))(x)

    model = Model(encoder_inputs, decoder_outputs)
    model.compile(optimizer=Adam(learning_rate=lr), loss=loss_fn)

    return model

In [20]:
from keras.src.callbacks import LearningRateScheduler, EarlyStopping, Callback
from keras.src.optimizers import Adam
from keras import Model
import numpy as np


def exponential_decay_schedule(epoch, lr):
    decay_rate = 0.9
    decay_steps = 5
    if epoch % decay_steps == 0 and epoch:
        print('Learning rate update:', lr * decay_rate)
        return lr * decay_rate
    return lr


# Custom callback to monitor LR and stop training
class LRThresholdCallback(Callback):
    def __init__(self, threshold=9e-5):
        super().__init__()
        self.threshold = threshold
        self.should_stop = False

    def on_epoch_end(self, epoch, logs=None):
        lr = float(self.model.optimizer.learning_rate.numpy())
        if lr < self.threshold:
            print(f"\nLearning rate {lr:.6f} < threshold {self.threshold}, moving to Phase 2.")
            self.model.stop_training = True

def train_model(train_data, batch_size=32, validation_split=0.2):
    n_scenarios = train_data.shape[0]
    Tobs = 50

    X_train_raw = []
    y_train_deltas = []

    for i in range(n_scenarios):
        ego_data = train_data[i, 0, :, :]
        if np.all(ego_data == 0):
            continue

        observed_data = ego_data[:Tobs]            # shape (50, 6)
        future_positions = ego_data[Tobs:, :2]     # shape (60, 2)
        last_observed_pos = observed_data[-1, :2]  # shape (2,)

        if np.any(np.all(observed_data == 0, axis=1)) or np.any(np.all(future_positions == 0, axis=1)):
            continue

        # Compute deltas
        delta = future_positions - last_observed_pos  # shape (60, 2)

        X_train_raw.append(observed_data)
        y_train_deltas.append(delta)

    X_train = np.array(X_train_raw)
    y_train = np.array(y_train_deltas)

    print(f"Training with {X_train.shape[0]} valid scenarios")
    print(f"Input shape: {X_train.shape}, Output shape: {y_train.shape}")

    # === Phase 1 === now
    model = create_lstm_encoder_decoder(
        input_dim=X_train.shape[-1],
        output_dim=2,
        timesteps_in=Tobs,
        timesteps_out=y_train.shape[1],
        loss_fn='mse',
        lr=0.001
    )

    print("\n--- Phase 1: Training with MSE loss ---\n")
    phase1_callbacks = [
        LearningRateScheduler(exponential_decay_schedule),
        EarlyStopping(patience=4, restore_best_weights=True, monitor='val_loss'),
        LRThresholdCallback(threshold=9e-5)
    ]

    model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=batch_size,
        validation_split=validation_split,
        verbose=1,
        callbacks=phase1_callbacks
    )

    # === Phase 2 ===
    print("\n--- Phase 2: Fine-tuning with MSE loss ---\n")
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')

    phase2_callbacks = [
        LearningRateScheduler(exponential_decay_schedule),
        EarlyStopping(patience=3, restore_best_weights=True, monitor='val_loss')
    ]

    model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=batch_size,
        validation_split=validation_split,
        verbose=1,
        callbacks=phase2_callbacks
    )

    return model


In [21]:
import pickle

def save_model(model, filepath='lstm_1.pkl'):
    """Save model and scaler together in a pickle file"""
    model_json = model.to_json()
    model_weights = model.get_weights()
    data = {
        'model_json': model_json,
        'model_weights': model_weights,
    }
    with open(filepath, 'wb') as f:
        pickle.dump(data, f)
    print(f"Model saved to {filepath}")

def load_model(filepath='lstm_1.pkl'):
    """Load model and scaler from pickle file"""
    with open(filepath, 'rb') as f:
        data = pickle.load(f)
    
    # Reconstruct model
    model = tf.keras.models.model_from_json(data['model_json'])
    model.set_weights(data['model_weights'])
    model.compile(optimizer='adam', loss='mse')
    
    return model

In [22]:
# Train the model
model = train_model(train_data)

# Save the model 
save_model(model)

Training with 10000 valid scenarios
Input shape: (10000, 50, 6), Output shape: (10000, 60, 2)

--- Phase 1: Training with Huber loss ---

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 91ms/step - loss: 9.7306 - val_loss: 9.2787 - learning_rate: 0.0100
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 102ms/step - loss: 9.5527 - val_loss: 9.2788 - learning_rate: 0.0100
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 90ms/step - loss: 9.6450 - val_loss: 9.2787 - learning_rate: 0.0100
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 100ms/step - loss: 9.5809 - val_loss: 9.3047 - learning_rate: 0.0100
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 94ms/step - loss: 9.6332 - val_loss: 9.2785 - learning_rate: 0.0100
Learning rate update: 0.008999999798834325
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 94ms/step

In [23]:
def reconstruct_absolute_positions(pred_deltas, last_observed_positions):
    """
    Reconstruct absolute predicted positions by adding deltas to the last observed position.

    Args:
        pred_deltas: np.ndarray of shape (N, Tpred, 2)
        last_observed_positions: np.ndarray of shape (N, 2)

    Returns:
        np.ndarray of shape (N, Tpred, 2)
    """
    return last_observed_positions[:, None, :] + pred_deltas


def forecast_positions(scenario_data, Tobs, Tpred, model):
    """
    Use LSTM model to forecast future positions from deltas and reconstruct absolute positions.

    Args:
        scenario_data (numpy.ndarray): Shape (agents, time_steps, dimensions)
        Tobs (int): Number of observed time steps
        Tpred (int): Number of future time steps to predict
        model (Model): Trained LSTM model that predicts deltas

    Returns:
        numpy.ndarray: Predicted absolute positions of shape (agents, Tpred, 2)
    """
    agents, _, _ = scenario_data.shape
    predicted_positions = np.zeros((agents, Tpred, 2))

    for agent_idx in range(agents):
        agent_data = scenario_data[agent_idx, :Tobs, :]

        # Skip padding
        if np.all(agent_data == 0):
            continue

        # Predict deltas
        X_pred = np.expand_dims(agent_data, axis=0)
        delta_pred = model.predict(X_pred, verbose=0)[0]  # (Tpred, 2)

        # Last observed absolute position
        last_pos = agent_data[Tobs - 1, :2]  # shape (2,)

        # Reconstruct absolute positions
        absolute_pred = reconstruct_absolute_positions(
            pred_deltas=np.expand_dims(delta_pred, axis=0),
            last_observed_positions=np.expand_dims(last_pos, axis=0)
        )[0]

        predicted_positions[agent_idx] = absolute_pred

    return predicted_positions



In [27]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation

def make_gif(data_matrix1, data_matrix2, name='comparison'):
    cmap1 = plt.cm.get_cmap('viridis', 50)
    cmap2 = plt.cm.get_cmap('plasma', 50)

    assert data_matrix1.shape[1] == data_matrix2.shape[1], "Both matrices must have same number of timesteps"
    timesteps = data_matrix1.shape[1]

    fig, axes = plt.subplots(1, 2, figsize=(18, 9))
    ax1, ax2 = axes

    def update(frame):
        for ax in axes:
            ax.clear()

        for i in range(data_matrix1.shape[0]):
            for (data_matrix, ax, cmap) in [(data_matrix1, ax1, cmap1), (data_matrix2, ax2, cmap2)]:
                x = data_matrix[i, frame, 0]
                y = data_matrix[i, frame, 1]
                if x != 0 and y != 0:
                    xs = data_matrix[i, :frame+1, 0]
                    ys = data_matrix[i, :frame+1, 1]
                    mask = (xs != 0) & (ys != 0)
                    xs = xs[mask]
                    ys = ys[mask]
                    if len(xs) > 0 and len(ys) > 0:
                        color = cmap(i)
                        ax.plot(xs, ys, alpha=0.9, color=color)
                        ax.scatter(x, y, s=80, color=color)

        # Plot ego vehicle (index 0) on both
        ax1.plot(data_matrix1[0, :frame, 0], data_matrix1[0, :frame, 1], color='tab:orange', label='Ego Vehicle')
        ax1.scatter(data_matrix1[0, frame, 0], data_matrix1[0, frame, 1], s=80, color='tab:orange')
        ax1.set_title('Prediction')

        ax2.plot(data_matrix2[0, :frame, 0], data_matrix2[0, :frame, 1], color='tab:orange', label='Ego Vehicle')
        ax2.scatter(data_matrix2[0, frame, 0], data_matrix2[0, frame, 1], s=80, color='tab:orange')
        ax2.set_title('Actual')

        for ax, data_matrix in zip(axes, [data_matrix1, data_matrix2]):
            ax.set_xlim(data_matrix[:, :, 0][data_matrix[:, :, 0] != 0].min() - 10,
                        data_matrix[:, :, 0][data_matrix[:, :, 0] != 0].max() + 10)
            ax.set_ylim(data_matrix[:, :, 1][data_matrix[:, :, 1] != 0].min() - 10,
                        data_matrix[:, :, 1][data_matrix[:, :, 1] != 0].max() + 10)
            ax.legend()
            ax.set_xlabel('X')
            ax.set_ylabel('Y')

        fig.suptitle(f"Timestep {frame}", fontsize=16)
        return ax1.collections + ax1.lines + ax2.collections + ax2.lines

    anim = animation.FuncAnimation(fig, update, frames=list(range(0, timesteps, 3)), interval=100, blit=True)
    anim.save(f'trajectory_visualization_{name}.gif', writer='pillow')
    plt.close()


In [31]:
# visualize prediction

# model = load_model()

# Parameters
Tobs = 50
Tpred = 60

data = train_data[5]

# Select a test scenario (can use any valid index)
test_scenario = data.copy()  # shape (agents, time_steps, features)

# Forecast future positions
predicted_positions = forecast_positions(test_scenario, Tobs, Tpred, model)

# Create combined matrix of past observed + predicted for ego agent (agent 0)
ego_past = test_scenario[0, :Tobs, :2]               # shape (Tobs, 2)
ego_future = predicted_positions[0]                  # shape (Tpred, 2)
ego_full = np.concatenate([ego_past, ego_future], axis=0)  # shape (Tobs + Tpred, 2)

# Create updated scenario with predicted ego and original others
updated_scenario = test_scenario.copy()
updated_scenario[0, :Tobs+Tpred, :2] = ego_full  # Replace ego trajectory

# Visualize
make_gif(updated_scenario, data, name='lstm1')


  cmap1 = plt.cm.get_cmap('viridis', 50)
  cmap2 = plt.cm.get_cmap('plasma', 50)


In [None]:
from sklearn.metrics import mean_squared_error


def evaluate_mse(train_data, model, scaler, Tobs=50, Tpred=60):
    """
    Computes LSTM prediction for ego agent and evaluates MSE with progress reporting.
    """
    N = train_data.shape[0]
    mse_list = []
    valid_scenarios = 0
    
    print(f"Evaluating {N} scenarios...")
    
    # Progress reporting variables
    report_interval = max(1, N // 10)  # Report at 10% intervals
    
    for i in range(N):
        # Progress reporting
        if i % report_interval == 0 or i == N-1:
            print(f"Processing scenario {i+1}/{N} ({(i+1)/N*100:.1f}%)")
        
        scenario_data = train_data[i]
        ego_agent_data = scenario_data[0]
        ground_truth = ego_agent_data[Tobs:Tobs+Tpred, :2]
        
        # Skip if ground truth contains all zeros (padded)
        if np.all(ground_truth == 0):
            continue
            
        valid_scenarios += 1
        
        # Forecast future positions
        predicted_positions = forecast_positions(
            ego_agent_data[np.newaxis, :, :],
            Tobs, Tpred, model, scaler
        )
        
        # Compute MSE
        mse = mean_squared_error(ground_truth, predicted_positions[0])
        mse_list.append(mse)
        
        # Occasional MSE reporting
        if i % report_interval == 0:
            print(f"  Current scenario MSE: {mse:.4f}")
    
    # Final results
    if mse_list:
        overall_mse = np.mean(mse_list)
        print(f"Evaluation complete: {valid_scenarios} valid scenarios")
        print(f"Mean Squared Error (MSE): {overall_mse:.4f}")
        print(f"Min MSE: {np.min(mse_list):.4f}, Max MSE: {np.max(mse_list):.4f}")
        return overall_mse
    else:
        print("No valid scenarios for evaluation.")
        return None

In [None]:
# Evaluate on training data
evaluate_mse(train_data, model, scaler)