In [None]:
import numpy as np 
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
ngsim_df = pd.read_csv('trajectories-0750am-0805am.csv')
ngsim_df.head()

In [None]:
ngsim_vehicle_ids = ngsim_df['Vehicle_ID'].unique()
print(f'Number of vehicles in the dataset: {len(ngsim_vehicle_ids)}')

In [None]:
n_trajectories_to_sample = 200
length_to_sample = 150
np.random.seed(0)
vehicle_ids_to_sample = np.random.choice(ngsim_vehicle_ids, n_trajectories_to_sample, replace=False)
print(f'Vehicle IDs to sample: {vehicle_ids_to_sample}')

In [None]:
from tqdm import tqdm
# Extract the trajectories of the sampled vehicles, limiting to 150 timesteps maximum
# Create an empty dataframe to store the sampled trajectories
sample_df = pd.DataFrame(columns=ngsim_df.columns)
for vehicle_id in tqdm(vehicle_ids_to_sample):
    vehicle_df = ngsim_df[ngsim_df['Vehicle_ID'] == vehicle_id]
    timestep_min = vehicle_df['Global_Time'].min()
    timestep_max = vehicle_df['Global_Time'].max()
    # If the vehicle trajectory is shorter than the desired length, we skip the vehicle
    if timestep_max - timestep_min + 100 < length_to_sample * 100:
        print(f'Skipping vehicle {vehicle_id} because it has less than {length_to_sample} timesteps')
        continue 
    vehicle_df = vehicle_df[vehicle_df['Global_Time'] <= timestep_min + (length_to_sample - 1) * 100]
    sample_df = pd.concat([sample_df, vehicle_df])

print(f'Number of sampled trajectories: {len(sample_df)}')

In [None]:
sample_df

In [None]:
def plot_trajectories(df: pd.DataFrame) -> None: 
    plt.figure(figsize=(3,3))
    for vehicle_id in df['Vehicle_ID'].unique():
        vehicle_df = df[df['Vehicle_ID'] == vehicle_id]
        plt.plot(vehicle_df['Global_Time'] - vehicle_df['Global_Time'].min(), vehicle_df['Local_Y'] - vehicle_df['Local_Y'].min(), label=vehicle_id)
    plt.xlabel('Time (ms)')
    plt.ylabel('Longitudinal position (m)')

plot_trajectories(sample_df)

In [None]:
# Write the sample_df to a CSV file, keeping only the columns we need
sample_df_clean = sample_df[['Vehicle_ID', 'Global_Time', 'Local_X', 'Local_Y']]
# Add the velocity column
sample_df_clean['Velocity'] = sample_df_clean.groupby('Vehicle_ID')['Local_Y'].diff() / 0.1
sample_df_clean = sample_df_clean.dropna() 

In [None]:
sample_df_clean.to_csv('ngsim_sample.csv', index=False)

In [None]:
sample_df_clean.head()

In [None]:
# Write the test_df to a CSV file, keeping only the columns we need
test_df = pd.DataFrame(columns=ngsim_df.columns)
# First Vehicle_ID df
traj0 = ngsim_df[ngsim_df['Vehicle_ID'] == ngsim_df['Vehicle_ID'].iloc[0]]
# Invert the sign of the Local_Y
traj0['Local_Y'] = -traj0['Local_Y']
# Change the Vehicle_ID to 0
traj0['Vehicle_ID'] = 0
# Add the first Vehicle_ID to the test_df
# test_df = pd.concat([test_df, ngsim_df[ngsim_df['Vehicle_ID'] == ngsim_df['Vehicle_ID'].iloc[0]]])
# Add traj0 to the test_df
test_df = pd.concat([test_df, traj0])
# Write the test_df to a CSV file, keeping only the columns we need
test_df_clean = test_df[['Vehicle_ID', 'Global_Time', 'Local_X', 'Local_Y']]
test_df_clean.to_csv('ngsim_test.csv', index=False)

        