# Read data

In [1]:
import os
import typing
from tqdm import tqdm

In [2]:
ROOT_DATA_FOLDER = "./YandexCup2024v2"

TRAIN_DATASET_PATH = os.path.join(ROOT_DATA_FOLDER, "YaCupTrain")
TEST_DATASET_PATH = os.path.join(ROOT_DATA_FOLDER, "YaCupTest")

In [3]:
# Load all ids of a dataset

def read_testcase_ids(dataset_path: str):
    ids = [int(case_id) for case_id in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, case_id))]
    return ids

In [4]:
train_ids = read_testcase_ids(TRAIN_DATASET_PATH)
len(train_ids)

42000

In [5]:
test_ids = read_testcase_ids(TEST_DATASET_PATH)
len(test_ids)

8000

In [6]:
class DataFilePaths:
    def __init__(self, testcase_path: str):
        self.testcase_path = testcase_path
        
    def localization(self):
        return os.path.join(self.testcase_path, 'localization.csv')
    
    def control(self):
        return os.path.join(self.testcase_path, 'control.csv')
    
    def metadata(self):
        return os.path.join(self.testcase_path, 'metadata.json')
    
    # exists only for test_dataset
    def requested_stamps(self):
        return os.path.join(self.testcase_path, 'requested_stamps.csv')    

In [7]:
import pandas as pd
import json

def read_localization(localization_path: str):
    return pd.read_csv(localization_path)

def read_control(control_path):
    return pd.read_csv(control_path)

def read_metadata(metadata_path: str):
    with open(metadata_path, 'r') as f:
        data = json.load(f)
    return data

def read_requested_stamps(requested_stamps_path: str):
    return pd.read_csv(requested_stamps_path)
    
def read_testcase(dataset_path: str, testcase_id: str, is_test: bool = False):
    testcase_path = os.path.join(dataset_path, str(testcase_id))
    data_file_paths = DataFilePaths(testcase_path)
    
    testcase_data = {}
    testcase_data['localization'] = read_localization(data_file_paths.localization())
    testcase_data['control'] = read_control(data_file_paths.control())
    testcase_data['metadata'] = read_metadata(data_file_paths.metadata())
    if is_test:
        testcase_data['requested_stamps'] = read_requested_stamps(data_file_paths.requested_stamps())
        
    return testcase_data

In [8]:
def read_testcases(dataset_path: str, is_test: bool = False, testcase_ids: typing.Iterable[int] = None):
    result = {}
    if testcase_ids is None:
        testcase_ids = read_testcase_ids(dataset_path)
    
    for testcase_id in tqdm(testcase_ids):
        testcase = read_testcase(dataset_path, testcase_id, is_test=is_test)
        result[testcase_id] = testcase
    return result

In [10]:
# may take some time
train_dataset = read_testcases(TRAIN_DATASET_PATH,  testcase_ids = train_ids[:])

len(train_dataset)

100%|██████████| 42000/42000 [03:51<00:00, 181.63it/s]


42000

In [11]:
test_dataset = read_testcases(TEST_DATASET_PATH, is_test=True)
len(test_dataset)

100%|██████████| 8000/8000 [00:35<00:00, 228.04it/s]


8000

In [12]:
import os
import typing
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# DL dataset

## Label categories

In [13]:
from collections import Counter

In [14]:
vehicle_models = []
vehicle_modifications = []
tires_types = []
location_ids = []
vehicle_ids = []


for testcase in train_dataset.values():
    metadata = testcase['metadata']
    vehicle_ids.append(metadata['vehicle_id'])
    vehicle_models.append(metadata['vehicle_model'])
    vehicle_modifications.append(metadata['vehicle_model_modification'])
    location_ids.append(metadata['location_reference_point_id'])
    tires_types.append(metadata['tires']['front'])
    tires_types.append(metadata['tires']['rear'])

In [15]:
unknown_vehicle_model_idx = Counter(vehicle_models).most_common()[0][0]
unknown_vehicle_modification_idx = Counter(vehicle_modifications).most_common()[0][0]
unknown_tires_idx = Counter(tires_types).most_common()[0][0]

In [16]:
vehicle_ids = set(vehicle_ids)
vehicle_models = set(vehicle_models)
vehicle_modifications = set(vehicle_modifications)
tires_types = set(tires_types)
location_ids = set(location_ids)


vehicle_model_mapping = {label: idx for idx, label in enumerate(vehicle_models)}
vehicle_modification_mapping = {label: idx for idx, label in enumerate(vehicle_modifications)}
tires_mapping = {label: idx for idx, label in enumerate(tires_types)}

# unknown_vehicle_model_idx = len(vehicle_models)
# unknown_vehicle_modification_idx = len(vehicle_modifications)
# unknown_tires_idx = len(tires_types)

## train/val split

In [17]:
val_ratio = 0.2

In [18]:
import random

def split_identifiers(identifiers, val_ratio=0.2):
    identifiers = list(identifiers)
    random.shuffle(identifiers)
    val_size = int(len(identifiers) * val_ratio)
    val_identifiers = set(identifiers[:val_size])
    train_identifiers = set(identifiers[val_size:])
    return train_identifiers, val_identifiers

# Split location_reference_point_id
train_location_ids, val_location_ids = split_identifiers(location_ids, val_ratio=val_ratio)

# Optionally, split vehicle_ids and vehicle_models similarly
train_vehicle_ids, val_vehicle_ids = split_identifiers(vehicle_ids)
train_vehicle_models, val_vehicle_models = split_identifiers(vehicle_modifications)
train_data = {}
val_data = {}

for testcase_id, testcase in train_dataset.items():
    metadata = testcase['metadata']
    location_id = metadata['location_reference_point_id']
    vehicle_id = metadata['vehicle_id']
    vehicle_model = metadata['vehicle_model']
    
    # Determine whether to include the sample in training or validation set
    if (location_id in train_location_ids and
        vehicle_id in train_vehicle_ids and
        vehicle_model in train_vehicle_models):
        train_data[testcase_id] = testcase
    else:
        val_data[testcase_id] = testcase

# Check for overlaps
assert not set(train_data.keys()) & set(val_data.keys()), "Overlap between training and validation sets"

## torch Dataset

In [19]:
import torch
from torch.utils.data import Dataset
from copy import deepcopy

class TrajectoryDataset(Dataset):
    def __init__(self, dataset, training=True):
        self.data = []
        sampling_interval_ns = 4e7  # 0.04 seconds in nanoseconds
        initial_state_length = int(5 / 0.04)  # Steps for initial 5 seconds (125 steps)
        target_length = int(15 / 0.04)  # Steps from 5s to 20s (375 steps)
        sequence_length = initial_state_length + target_length  # Total steps (500 steps)

        for testcase_id, testcase in tqdm(dataset.items()):
            metadata = testcase['metadata']
            vehicle_features = self.encode_vehicle_features(metadata)

            # Get control commands
            control = testcase['control']
            control['acceleration_level'] = control['acceleration_level']
            control_seq = control[['stamp_ns', 'acceleration_level', 'steering']].values

            # Get localization data
            localization = testcase['localization']
            localization_seq = localization[['stamp_ns', 'x', 'y', 'z', 'roll', 'pitch', 'yaw']].values
    
            time_steps = np.arange(0, 60 * 1e9, sampling_interval_ns)
            control_resampled = self.resample_sequence(control_seq, time_steps)
            control_resampled = control_resampled[:, 1:] # drop ns
            localization_resampled = self.resample_sequence(localization_seq, time_steps)
            localization_resampled = localization_resampled[:, 1:] # drop ns
            
            max_start_idx = len(time_steps) - sequence_length
            for i in range(0, max_start_idx, initial_state_length):  # Slide window
                # Initial localization sequence (first 5 seconds)
                
                input_localization = deepcopy(localization_resampled[i:i+initial_state_length])  # Shape: [125, 3]
                
                start_position = deepcopy(input_localization[0][:3])
                
                input_localization[:, :3] -= start_position # Shift initial localization to zero
                
                # Target trajectory from t + 5s to t + 20s
                output_localization = deepcopy(localization_resampled[i+initial_state_length:i+sequence_length]) # Shape: [375, 3]
                output_localization[:, :3] -= start_position
            
                # Initial and inference control sequence
                input_control_sequence = deepcopy(control_resampled[i:i+initial_state_length])
                output_control_sequence = deepcopy(control_resampled[i+initial_state_length:i+sequence_length])
                
            
                self.data.append({
                    'vehicle_features': vehicle_features,
                    'input_localization': input_localization,
                    'output_localization': output_localization,
                    'input_control_sequence': input_control_sequence,
                    'output_control_sequence': output_control_sequence,
                })

    def encode_vehicle_features(self, metadata):
        vehicle_model = vehicle_model_mapping.get(metadata['vehicle_model'], unknown_vehicle_model_idx)
        vehicle_modification = vehicle_modification_mapping.get(metadata['vehicle_model_modification'], unknown_vehicle_modification_idx)
        tires_front = tires_mapping.get(metadata['tires']['front'], unknown_tires_idx)
        tires_rear = tires_mapping.get(metadata['tires']['rear'], unknown_tires_idx)
        vehicle_features = [vehicle_model, vehicle_modification, tires_front, tires_rear]
        return vehicle_features

    def resample_sequence(self, seq, time_steps):
        df_seq = pd.DataFrame(seq, columns=['stamp_ns'] + [f'feat_{i}' for i in range(seq.shape[1]-1)])
        df_seq = df_seq.set_index('stamp_ns').reindex(time_steps, method='nearest').reset_index()
        return df_seq.values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        tensor_dict = {}
        for k, v in sample.items():
            if k.startswith('vehicle'):
                tensor_dict[k] = torch.tensor(v, dtype=torch.long)
            else:
                tensor_dict[k] = torch.tensor(v, dtype=torch.float32)
        return tensor_dict

In [20]:
import itertools

In [None]:
from torch.utils.data import DataLoader

# train_dataset = TrajectoryDataset(dict(itertools.islice(train_data.items(), 10)), training=True)
# val_dataset = TrajectoryDataset(dict(itertools.islice(val_data.items(), 10)), training=False)

train_dataset = TrajectoryDataset(train_data, training=True)
val_dataset = TrajectoryDataset(val_data, training=False)

  2%|▏         | 745/33831 [00:03<02:33, 214.86it/s]

## Dataloader

In [None]:
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [None]:
for batch in train_loader:
    for k, v in batch.items():
        print(f"{k}: {v.shape}")
    break

In [None]:
batch_id = 11

In [None]:
for ind in [-3, -2, -1]:
    print(batch['input_localization'][batch_id][ind][:3])
for ind in [0, 1, 2, 3]:
    print(batch['output_localization'][batch_id][ind][:3])

# Modeling

## EncoderDecoder

In [None]:
class TrajectoryEncoderDecoder(nn.Module):
    def __init__(self, vehicle_feature_sizes, embedding_dim, localization_input_size, control_input_size, hidden_size, num_layers):
        super(TrajectoryEncoderDecoder, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Vehicle feature embeddings
        self.vehicle_model_embedding = nn.Embedding(num_embeddings=vehicle_feature_sizes['vehicle_model'], embedding_dim=embedding_dim)
        self.vehicle_modification_embedding = nn.Embedding(num_embeddings=vehicle_feature_sizes['vehicle_modification'], embedding_dim=embedding_dim)
        self.tires_embedding = nn.Embedding(num_embeddings=vehicle_feature_sizes['tires'], embedding_dim=embedding_dim)

        # Fully connected layer to combine vehicle features
        self.vehicle_fc = nn.Linear(embedding_dim * 4, hidden_size)

        # Encoder LSTM for localization
        self.localization_encoder = nn.LSTM(input_size=localization_input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # Encoder LSTM for control sequence
        self.control_encoder = nn.LSTM(input_size=control_input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # Decoder LSTM
        self.decoder = nn.LSTM(input_size=control_input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # Output layer
        self.fc_out = nn.Linear(hidden_size, localization_input_size)

    def forward(self, vehicle_features, input_localization, input_control_sequence, output_control_sequence):
        batch_size = input_localization.size(0)

        # Embed vehicle features
        vehicle_model = self.vehicle_model_embedding(vehicle_features[:, 0])
        vehicle_modification = self.vehicle_modification_embedding(vehicle_features[:, 1])
        tires_front = self.tires_embedding(vehicle_features[:, 2])
        tires_rear = self.tires_embedding(vehicle_features[:, 3])

        # Concatenate vehicle features
        vehicle_embedded = torch.cat([vehicle_model, vehicle_modification, tires_front, tires_rear], dim=1)
        vehicle_features_encoded = self.vehicle_fc(vehicle_embedded)  # Shape: [batch_size, hidden_size]

        # Encoder for localization
        _, (hidden_loc, cell_loc) = self.localization_encoder(input_localization)  # hidden_loc: [num_layers, batch_size, hidden_size]

        # Encoder for control sequence
        _, (hidden_ctrl, cell_ctrl) = self.control_encoder(input_control_sequence)  # hidden_ctrl: [num_layers, batch_size, hidden_size]

        # Combine encoder hidden states and vehicle features
        # Option to concatenate, sum, or average hidden states
        hidden_enc = (hidden_loc + hidden_ctrl) / 2  # Shape: [num_layers, batch_size, hidden_size]
        cell_enc = (cell_loc + cell_ctrl) / 2

        # Incorporate vehicle features into the hidden state
        # We'll add vehicle_features_encoded to the first layer's hidden state
        hidden_enc[0] = hidden_enc[0] + vehicle_features_encoded.unsqueeze(0)

        # Decoder
        decoder_output, _ = self.decoder(output_control_sequence, (hidden_enc, cell_enc))  # decoder_output: [batch_size, seq_len, hidden_size]

        # Output layer
        output_localization = self.fc_out(decoder_output)  # Shape: [batch_size, seq_len, localization_input_size]

        return output_localization

## Define embedding sizes

In [None]:
# Define sizes based on your data
vehicle_feature_sizes = {
    'vehicle_model': len(vehicle_model_mapping),
    'vehicle_modification': len(vehicle_modification_mapping),
    'tires': len(tires_mapping),
}

embedding_dim = 16
localization_input_size = 6  # For example, x, y, z, roll, pitch, yaw
control_input_size = 2  # acceleration_level, steering
hidden_size = 128
num_layers = 2


## Metric

In [None]:
import numpy as np

SEGMENT_LENGTH = 1.0

def calculate_metric_on_batch(output_np, target_np, segment_length=1.0):
    """
    output_np: numpy array of shape [batch_size, seq_len, 4], predicted x, y, yaw
    target_np: numpy array of same shape, ground truth x, y, yaw

    Returns:
        metric: float, the average metric over the batch
    """
    x_pred, y_pred, yaw_pred = output_np[..., 0], output_np[..., 1], output_np[..., 2]
    x_gt, y_gt, yaw_gt = target_np[..., 0], target_np[..., 1], target_np[..., 2]

    # Compute c1 and c2 for predicted
    c1_pred = np.stack([x_pred, y_pred], axis=-1)
    c2_pred = c1_pred + segment_length * np.stack([np.cos(yaw_pred), np.sin(yaw_pred)], axis=-1)

    # Compute c1 and c2 for ground truth
    c1_gt = np.stack([x_gt, y_gt], axis=-1)
    c2_gt = c1_gt + segment_length * np.stack([np.cos(yaw_gt), np.sin(yaw_gt)], axis=-1)

    # Compute distances between corresponding points
    dist_c1 = np.linalg.norm(c1_pred - c1_gt, axis=-1)
    dist_c2 = np.linalg.norm(c2_pred - c2_gt, axis=-1)

    # Compute pose metric
    pose_metric = np.sqrt((dist_c1 ** 2 + dist_c2 ** 2) / 2.0)
    metric = np.mean(pose_metric)

    return metric

## Init model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TrajectoryEncoderDecoder(
    vehicle_feature_sizes=vehicle_feature_sizes,
    embedding_dim=embedding_dim,
    localization_input_size=localization_input_size,
    control_input_size=control_input_size,
    hidden_size=hidden_size,
    num_layers=num_layers
).to(device)


In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
from tqdm import tqdm

num_epochs = 40
best_val_loss = 100000
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    epoch_metric = 0
    for sample in tqdm(train_loader):
        vehicle_features = sample['vehicle_features'].to(device)
        input_localization = sample['input_localization'].to(device)
        output_localization = sample['output_localization'].to(device)
        input_control_sequence = sample['input_control_sequence'].to(device)
        output_control_sequence = sample['output_control_sequence'].to(device)

        optimizer.zero_grad()
        predicted_output_localization = model(
            vehicle_features,
            input_localization,
            input_control_sequence,
            output_control_sequence
        )
        loss = criterion(predicted_output_localization, output_localization)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        predicted_x_y_yaw = predicted_output_localization[..., [0,1,-1]].detach().cpu().numpy()
        gt_x_y_yaw = output_localization[..., [0,1,-1]].detach().cpu().numpy()
        batch_metric = calculate_metric_on_batch(predicted_x_y_yaw, gt_x_y_yaw)
        epoch_metric += batch_metric

    avg_loss = epoch_loss / len(train_loader)
    avg_metric = epoch_metric / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss:.4f}, Metric: {avg_metric:.4f}")
    
    model.eval()
    val_loss = 0
    val_metric = 0
    with torch.no_grad():
        for sample in tqdm(val_loader):
            vehicle_features = sample['vehicle_features'].to(device)
            input_localization = sample['input_localization'].to(device)
            output_localization = sample['output_localization'].to(device)
            input_control_sequence = sample['input_control_sequence'].to(device)
            output_control_sequence = sample['output_control_sequence'].to(device)
    
            optimizer.zero_grad()
            predicted_output_localization = model(
                vehicle_features,
                input_localization,
                input_control_sequence,
                output_control_sequence
            )
            loss = criterion(predicted_output_localization, output_localization)
            val_loss += loss.item()

            predicted_x_y_yaw = predicted_output_localization[..., [0,1,-1]].detach().cpu().numpy()
            gt_x_y_yaw = output_localization[..., [0,1,-1]].detach().cpu().numpy()
            batch_metric = calculate_metric_on_batch(predicted_x_y_yaw, gt_x_y_yaw)
            val_metric += batch_metric
            
    avg_val_loss = val_loss / len(val_loader)
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.jit.save(torch.jit.script(model), 'best.pt')
        
    avg_val_metric = val_metric / len(val_loader)
    print(f"Validation Loss: {avg_val_loss:.4f}, Metric: {avg_val_metric:.4f}")

In [32]:
model.eval()
val_loss = 0
val_metric = 0
with torch.no_grad():
    for sample in tqdm(val_loader):
        vehicle_features = sample['vehicle_features'].to(device)
        input_localization = sample['input_localization'].to(device)
        output_localization = sample['output_localization'].to(device)
        input_control_sequence = sample['input_control_sequence'].to(device)
        output_control_sequence = sample['output_control_sequence'].to(device)

        optimizer.zero_grad()
        predicted_output_localization = model(
            vehicle_features,
            input_localization,
            input_control_sequence,
            output_control_sequence
        )
        loss = criterion(predicted_output_localization, output_localization)
        val_loss += loss.item()

        predicted_x_y_yaw = predicted_output_localization[..., [0,1,-1]].detach().cpu().numpy()
        gt_x_y_yaw = output_localization[..., [0,1,-1]].detach().cpu().numpy()
        batch_metric = calculate_metric_on_batch(predicted_x_y_yaw, gt_x_y_yaw)
        val_metric += batch_metric
        
avg_val_loss = val_loss / len(val_loader)
if avg_val_loss < best_val_loss:
    best_val_loss = avg_val_loss
    torch.jit.save(torch.jit.script(model), 'best.pt')
    
avg_val_metric = val_metric / len(val_loader)
print(f"Validation Loss: {avg_val_loss:.4f}, Metric: {avg_val_metric:.4f}")

100%|██████████| 256/256 [00:10<00:00, 23.34it/s]

Validation Loss: 14.1419, Metric: 5.7822





In [33]:
class TestDataset(Dataset):
    def __init__(self, dataset_path):
        self.data = []
        sampling_interval_ns = 4e7  # 0.04 seconds in nanoseconds
        initial_state_length = int(5 / 0.04)  # 125 steps
        target_length = int(15 / 0.04)        # 375 steps
        sequence_length = initial_state_length + target_length  # 500 steps

        # Get list of test case IDs (folder names)
        testcase_ids = sorted([name for name in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, name))])

        for testcase_id in tqdm(testcase_ids):
            testcase_path = os.path.join(dataset_path, testcase_id)

            # Load metadata and encode vehicle features
            with open(os.path.join(testcase_path, 'metadata.json'), 'r') as f:
                metadata = json.load(f)
            vehicle_features = self.encode_vehicle_features(metadata)

            # Load localization data (first 5 seconds)
            localization = pd.read_csv(os.path.join(testcase_path, 'localization.csv'))
            localization_seq = localization[['stamp_ns', 'x', 'y', 'z', 'roll', 'pitch', 'yaw']].values

            # Load control data (first 20 seconds)
            control = pd.read_csv(os.path.join(testcase_path, 'control.csv'))
            control['acceleration_level'] = control['acceleration_level'].fillna(0)
            control_seq = control[['stamp_ns', 'acceleration_level', 'steering']].values

            # Load requested stamps
            requested_stamps = pd.read_csv(os.path.join(testcase_path, 'requested_stamps.csv'))['stamp_ns'].values

            # Resample sequences to fixed time steps
            time_steps_localization = np.arange(0, 5 * 1e9, sampling_interval_ns)
            time_steps_control = np.arange(0, 20 * 1e9, sampling_interval_ns)

            localization_resampled = self.resample_sequence(localization_seq, time_steps_localization)
            control_resampled = self.resample_sequence(control_seq, time_steps_control)

            # Process localization data
            localization_resampled = localization_resampled[:, 1:]  # Drop stamp_ns
            input_localization = localization_resampled.copy()      # Shape: [125, 7]

            # Subtract start position
            start_position = input_localization[0, :3].copy()
            input_localization[:, :3] -= start_position

            # Prepare input_control_sequence (first 5 seconds)
            control_resampled = control_resampled[:, 1:]  # Drop stamp_ns
            input_control_sequence = control_resampled[:initial_state_length].copy()  # [125, 2]

            # Prepare output_control_sequence (from 5s to 20s)
            output_control_sequence = control_resampled[initial_state_length:].copy()  # [375, 2]

            self.data.append({
                'testcase_id': int(testcase_id),
                'vehicle_features': vehicle_features,
                'input_localization': input_localization,
                'input_control_sequence': input_control_sequence,
                'output_control_sequence': output_control_sequence,
                'start_position': start_position,
                'requested_stamps': requested_stamps
            })

    def encode_vehicle_features(self, metadata):
        vehicle_model = vehicle_model_mapping.get(metadata['vehicle_model'], unknown_vehicle_model_idx)
        vehicle_modification = vehicle_modification_mapping.get(metadata['vehicle_model_modification'], unknown_vehicle_modification_idx)
        tires_front = tires_mapping.get(metadata['tires']['front'], unknown_tires_idx)
        tires_rear = tires_mapping.get(metadata['tires']['rear'], unknown_tires_idx)
        vehicle_features = [vehicle_model, vehicle_modification, tires_front, tires_rear]
        return vehicle_features

    def resample_sequence(self, seq, time_steps):
        df_seq = pd.DataFrame(seq, columns=['stamp_ns'] + [f'feat_{i}' for i in range(seq.shape[1]-1)])
        df_seq = df_seq.set_index('stamp_ns').reindex(time_steps, method='nearest').reset_index()
        return df_seq.values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        tensor_dict = {}
        for k, v in sample.items():
            if k == 'vehicle_features':
                tensor_dict[k] = torch.tensor(v, dtype=torch.long)
            elif k in ['input_localization', 'input_control_sequence', 'output_control_sequence']:
                tensor_dict[k] = torch.tensor(v, dtype=torch.float32)
            else:
                tensor_dict[k] = v  # Keep as is (e.g., start_position, requested_stamps, testcase_id)
        return tensor_dict


In [34]:
test_dataset = TestDataset(TEST_DATASET_PATH)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

100%|██████████| 8000/8000 [01:01<00:00, 130.69it/s]


In [36]:
initial_state_length = 125
target_length = 375

In [76]:
model.eval()
predictions = []

with torch.no_grad():
    for sample in tqdm(test_loader):
        testcase_id = sample['testcase_id'][0].item()
        vehicle_features = sample['vehicle_features'].to(device)
        input_localization = sample['input_localization'].to(device)
        input_control_sequence = sample['input_control_sequence'].to(device)
        output_control_sequence = sample['output_control_sequence'].to(device)
        start_position = sample['start_position'][0].numpy()
        requested_stamps = sample['requested_stamps'][0].numpy()

        # Check sequence lengths
        if input_localization.size(1) != initial_state_length:
            print(f"Skipping {testcase_id}: input_localization length mismatch")
            continue
        if input_control_sequence.size(1) != initial_state_length:
            print(f"Skipping {testcase_id}: input_control_sequence length mismatch")
            continue
        if output_control_sequence.size(1) != target_length:
            print(f"Skipping {testcase_id}: output_control_sequence length mismatch")
            continue

        # Forward pass
        try:
            predicted_output_localization = model(
                vehicle_features,
                input_localization,
                input_control_sequence,
                output_control_sequence
            )
        except Exception as e:
            print(f"Error processing {testcase_id}: {e}")
            continue


        predicted_output_localization = predicted_output_localization.detach().cpu().numpy()[0]
        predicted_output_localization[:, :3] += start_position
        
        # Get x, y positions, yaw
        yaw_pred = predicted_output_localization[:, -1]
        x_pred = predicted_output_localization[:, 0]
        y_pred = predicted_output_localization[:, 1]

        # Time steps corresponding to output predictions (from 5s to 20s every 0.04s)
        time_steps = np.arange(5 * 1e9, 20 * 1e9, 4e7)  # [375]

        # Map requested_stamps to indices in time_steps
        indices = np.searchsorted(time_steps, requested_stamps)
        
        # Handle any indices out of bounds
        indices = np.clip(indices, 0, len(time_steps) - 1)

        # Extract predictions at requested timestamps
        x_pred = x_pred[indices]
        y_pred = y_pred[indices]
        yaw_pred = yaw_pred[indices]

        # Collect predictions
        for stamp_ns, x, y, yaw in zip(requested_stamps, x_pred, y_pred, yaw_pred):
            predictions.append({
                'testcase_id': testcase_id,
                'stamp_ns': int(stamp_ns),
                'x': x,
                'y': y,
                'yaw': yaw
            })

100%|██████████| 8000/8000 [00:37<00:00, 215.37it/s]


In [77]:
predictions = pd.DataFrame(predictions)

In [78]:
predictions.isna().any()

testcase_id    False
stamp_ns       False
x              False
y              False
yaw            False
dtype: bool

In [79]:
predictions.head()

Unnamed: 0,testcase_id,stamp_ns,x,y,yaw
0,0,5000888836,-1490.079346,-1318.720703,2.238707
1,0,5040043013,-1491.794434,-1318.204834,2.17671
2,0,5079989560,-1491.794434,-1318.204834,2.17671
3,0,5120797471,-1494.852295,-1316.25647,2.027777
4,0,5165218288,-1494.279053,-1314.014282,1.822948


In [80]:
predictions.to_csv('finally_rnn.csv', index=False)

In [153]:
submission_df.shape

(8000, 5)

In [156]:
submission_df.explode(column=['stamp_ns', 'x', 'y', 'yaw']).to_csv('predictions.csv', index=False)

### read test dataset 

In [142]:
import numpy as np

NSECS_IN_SEC = 1000000000

def secs_to_nsecs(secs: float):
    return int(secs * NSECS_IN_SEC)

def nsecs_to_secs(nsecs: int):
    return float(nsecs) / NSECS_IN_SEC

def yaw_direction(yaw_value):
    return np.array([np.cos(yaw_value), np.sin(yaw_value)])

### simple pose prediction logic without taking into account control states 

In [143]:
def localization_df_to_poses(loc_df):
    poses = []
    for stamp_ns, x, y, yaw in zip(loc_df['stamp_ns'], loc_df['x'], loc_df['y'], loc_df['yaw']):
        poses.append({'stamp_ns': stamp_ns, 'pos': np.array([x, y]), 'yaw': yaw})
    return poses

# naive estimation of speed at last known localization pose
def dummy_estimate_last_speed(localization_poses):
    last_pose = localization_poses[-1]
    
    start_pose_idx = -1
    for i, pose in enumerate(localization_poses, start=1-len(localization_poses)):
        start_pose_idx = i
        if nsecs_to_secs(last_pose['stamp_ns']) - nsecs_to_secs(pose['stamp_ns']) > 1.: # sec
            break
            
    start_pose = localization_poses[start_pose_idx]
    dt_sec = nsecs_to_secs(last_pose['stamp_ns']) - nsecs_to_secs(start_pose['stamp_ns'])
    
    if dt_sec > 1e-5:
        return np.linalg.norm(last_pose['pos'][:2] - start_pose['pos'][:2]) / dt_sec
    return 5. # some default value

def dummpy_predict_pose(last_loc_pose: dict, last_speed: float, prediction_stamp: int):
    dt_sec = nsecs_to_secs(prediction_stamp) - nsecs_to_secs(last_loc_pose['stamp_ns'])
    distance = dt_sec * last_speed
    direction = yaw_direction(last_loc_pose['yaw'])
    pos_translate = direction * distance
    return {"pos": last_loc_pose['pos'] + pos_translate, 'yaw': last_loc_pose['yaw']}

In [144]:
def predict_testcase(testcase: dict):
    loc_df = testcase['localization']
    localization_poses = localization_df_to_poses(loc_df)
    
    last_loc_pose = localization_poses[-1]
    last_speed = dummy_estimate_last_speed(localization_poses)
    
    predicted_poses = []
    for stamp in testcase['requested_stamps']['stamp_ns']:
        pose = dummpy_predict_pose(last_loc_pose, last_speed, stamp)
        predicted_poses.append(pose)
        
    predictions = {}
    predictions['stamp_ns'] = testcase['requested_stamps']['stamp_ns']
    predictions['x'] = [pose['pos'][0] for pose in predicted_poses]
    predictions['y'] = [pose['pos'][1] for pose in predicted_poses]
    predictions['yaw'] = [pose['yaw'] for pose in predicted_poses]
    return pd.DataFrame(predictions)

def predict_test_dataset(test_dataset: dict):
    predictions = {}
    for testcase_id, testcase in tqdm(test_dataset.items()): 
        predictions[testcase_id] = predict_testcase(testcase)
    return predictions

### make prediction for requested stamps 

In [145]:
test_predictions = predict_test_dataset(test_dataset)
len(test_predictions)

100%|██████████| 8000/8000 [00:23<00:00, 343.02it/s]


8000

### write predictions 

In [146]:
def write_predictions(dataset_predictions: dict, prediction_file_path: str):
    prediction_list = []
    for testcase_id, prediction in tqdm(dataset_predictions.items()):
        prediction['testcase_id'] = [testcase_id] * len(prediction)
        prediction_list.append(prediction)
    predictions_df = pd.concat(prediction_list)
    predictions_df = predictions_df.reindex(columns=["testcase_id", "stamp_ns", "x", "y", "yaw"])
    print(len(predictions_df))
    predictions_df.to_csv(prediction_file_path, index=False, header=True)

In [147]:
write_predictions(test_predictions, os.path.join(ROOT_DATA_FOLDER, "dummy_prediction.csv"))

100%|██████████| 8000/8000 [00:01<00:00, 4018.03it/s]


2998763


In [149]:
ROOT_DATA_FOLDER

'./YandexCup2024v2'

In [150]:
dummy_prediction = pd.read_csv('./YandexCup2024v2/dummy_prediction.csv')

In [151]:
dummy_prediction.head()

Unnamed: 0,testcase_id,stamp_ns,x,y,yaw
0,0,5000888836,-1490.905035,-1310.813635,2.047693
1,0,5040043013,-1490.955001,-1310.716927,2.047693
2,0,5079989560,-1491.005979,-1310.618261,2.047693
3,0,5120797471,-1491.058057,-1310.517468,2.047693
4,0,5165218288,-1491.114744,-1310.407751,2.047693


In [152]:
dummy_prediction.shape

(2998763, 5)

In [None]:
dummy_prediction.head

# Calculate metric

Let's describe final metric. As a first step, all predicted triples $(x,y,yaw)$ are being converted into 2 points $[(x_1, y_1), (x_2, y_2)]$ in the following way:
$$
(x_1, y_1) = (x, y), \\
(x_2, y_2) = (x_1, y_1) + S \times (yaw_x, yaw_y)
$$  

where $S = 1$. In other words, we build a directed segment of length $1$. These points then used in the metric calculation.


Metric for a single pose (rmse):

$$
pose\_metric = \sqrt{ \frac{\displaystyle\sum_{j=1}^{k} {(x_j-\hat{x_j})^2 + (y_j-\hat{y_j})^2}}{k} }
$$

where $k$ - number of points that describe single pose (in our case $k=2$).

Metric for a testcase:

$$
testcase\_metric = \frac{1}{n}  \displaystyle\sum_{i=1}^{n}pose\_metric_i
$$

where $n$ - number of localization points to predict.

And, final metric for a whole dataset:

$$
dataset\_metric = \frac{1}{n}  \displaystyle\sum_{i=1}^{n}testcase\_metric_i
$$

where $n$ - number of test cases.


### implementation of the metric calculation 

In [None]:
import numpy as np
import pandas as pd

SEGMENT_LENGTH = 1.

def yaw_direction(yaw_value):
    return np.array([np.cos(yaw_value), np.sin(yaw_value)])

def build_car_points(x_y_yaw):
    directions = np.vstack(yaw_direction(x_y_yaw[:, -1]))
    
    front_points = x_y_yaw[:, :-1] + SEGMENT_LENGTH * directions.T
    points = np.vstack([x_y_yaw[:, :-1], front_points])
    return points

def build_car_points_from_merged_df(df: pd.DataFrame):
    points_gt = df[['x_gt', 'y_gt', 'yaw_gt']].to_numpy()
    points_pred = df[['x_pred', 'y_pred', 'yaw_pred']].to_numpy()
    
    points_gt = build_car_points(points_gt)
    points_pred = build_car_points(points_pred)
    return points_gt, points_pred

def calculate_metric_testcase(df: pd.DataFrame):        
    points_gt, points_pred = build_car_points_from_merged_df(df)
    
    metric = np.mean(np.sqrt(2. * np.mean((points_gt - points_pred) ** 2, axis=1)))
    return metric

def calculate_metric_dataset(ground_truth_df: pd.DataFrame, prediction_df: pd.DataFrame):
    assert (len(ground_truth_df) == len(prediction_df))
    
    df = ground_truth_df.merge(prediction_df, on=['testcase_id', 'stamp_ns'], suffixes=['_gt', '_pred'])
    
    metric = df.groupby('testcase_id').apply(calculate_metric_testcase)
    return np.mean(metric)