In [None]:
import sqlite3
import sys

In [None]:
import pandas as pd
from tqdm import tqdm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from shapely import affinity
import contextily as cx

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
sys.path.append("..")

In [None]:
from src.trajectory import Trajectory, point_to_bbox

# Read data

In [None]:
db_path = "../data/raw/rdb1/trajectories_rdb1_v3.sqlite"

In [None]:
con = sqlite3.connect(db_path)

In [None]:
cursor = con.cursor()

Tables in the sqlite DB:

In [None]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [r[0] for r in cursor.fetchall()]
print(tables)

In [None]:
pd.read_sql('SELECT * FROM rdb1_1', con)

In [None]:
df_list = list()
for t in tables:
    df_t = pd.read_sql(f'SELECT * FROM {t:s}', con)
    df_list.append(df_t)
    
df = pd.concat(df_list)

In [None]:
del df["TRAILER_ID"] 

In [None]:
df = df.reset_index()

In [None]:
df.info()

In [None]:
output_path = "../data/raw/rdb1.parquet"

In [None]:
# df.to_parquet(output_path)
df = pd.read_parquet(output_path)

In [None]:
df.info()

In [None]:
df.head()

# Preprocessing

In [None]:
df["_V_kmh"] = df["V"]*3.6

# Explore data

In [None]:
print("Number of traffic participants:", df["OBJID"].nunique())

In [None]:
print(df.groupby("OBJID").agg({"CLASS": "first"})["CLASS"].value_counts())

## Sample rate

In [None]:
obj1_df = df.query("OBJID==1").sort_values("TIMESTAMP")

In [None]:
obj1_df.head()

In [None]:
fig, axs = plt.subplots(nrows=2, figsize=(20, 10), sharex=True)
axv, axa = axs
axv.plot(obj1_df["TIMESTAMP"], obj1_df["_V_kmh"], "ko-")
axa.plot(obj1_df["TIMESTAMP"], obj1_df["ACC"], "ko-")

axv.set_ylabel("velocity [km/h]")
axv.grid()

axa.set_ylabel("acceleration [m/s²]")
axa.grid()

axs[-1].set_xlabel("time [s]")

In [None]:
delta_t_ms = np.diff(obj1_df["TIMESTAMP"].values*1000)

In [None]:
delta_t_ms.min(), delta_t_ms.max()

In [None]:
dr = stats.describe(delta_t_ms)
dr

Compute stats on sampling frequency derived from temporal differences between samples:

In [None]:
def mean_sample_frequency(values):
    return 1/np.diff(values).mean()

In [None]:
df.groupby("OBJID").agg({"TIMESTAMP": mean_sample_frequency}).rename(columns={'TIMESTAMP': 'frequency'}).describe()

Looks like the objects are sampled with **30 Hz**.

## Velocity and acceleration distributions per class

In [None]:
df["_V_kmh"] = df["V"]*3.6

In [None]:
fg = sns.displot(df, x="_V_kmh", col="CLASS", col_wrap=3, facet_kws=dict(sharey=False))

In [None]:
df["ACC_TAN"].describe()

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
sns.ecdfplot(df, x="ACC_TAN", hue="CLASS", ax=ax)
ax.set_xlim(-15, 15)
ax.set_xlabel("acceleration [m/s²]")
ax.grid()
fig.suptitle("Cumulative distribution of acceleration samples")

## Braking maneuvers

Get object IDs of trajectories with high decelleration

In [None]:
ids = df[df['ACC_TAN'].between(-15, -7)]["OBJID"].unique()
print(f"Found {len(ids)} objects!")

In [None]:
fig, axs = plt.subplots(nrows=2, figsize=(20, 10), sharex=True)
axv, axa = axs

for i, objid in enumerate(ids):
    obj_df = df.query(f"OBJID=={objid}").sort_values("TIMESTAMP")
    
    obj_df["_t"] = obj_df["TIMESTAMP"] - obj_df["TIMESTAMP"].min()
    
    axv.plot(obj_df["_t"], obj_df["_V_kmh"], label=f"{i:02d}: {objid:03d}")
    axa.plot(obj_df["_t"], obj_df["ACC_TAN"])

axv.set_ylabel("velocity [km/h]")
axv.grid()
axv.legend()

axa.set_ylabel("acceleration [m/s²]")
axa.grid()

axs[-1].set_xlabel("time [s]")

In [None]:
obj_df = df.query(f"OBJID=={ids[5]}").sort_values("TIMESTAMP")

In [None]:
obj_df

In [None]:
trajectory = Trajectory(obj_df["TIMESTAMP"], obj_df["UTM_X"], obj_df["UTM_Y"], obj_df["UTM_ANGLE"], width=obj_df["WIDTH"].iloc[0], length=obj_df["LENGTH"].iloc[0])

In [None]:
trajectory.projection

In [None]:
traj_gdf = trajectory.to_geopandas(as_bbox=False)

In [None]:
traj_gdf.head()

In [None]:
traj_gdf.crs.to_string()

In [None]:
# traj_gdf = traj_gdf.to_crs("EPSG:4326")

In [None]:
traj_gdf.crs.to_string()

In [None]:
ax = traj_gdf.plot(color="red", figsize=(9, 9))
cx.add_basemap(ax, crs=traj_gdf.crs.to_string(), source=cx.providers.CartoDB.Voyager, zoom="auto")

In [None]:
traj_gdf.head()

In [None]:
fig, axs = plt.subplots(nrows=4, figsize=(20, 10), sharex=True)

axx, axy, axv, axa = axs

axx.plot(obj_df["TIMESTAMP"], obj_df["UTM_X"], "k-")
axy.plot(obj_df["TIMESTAMP"], obj_df["UTM_Y"], "k-")

axv.plot(obj_df["TIMESTAMP"], obj_df["_V_kmh"], "k-")
axa.plot(obj_df["TIMESTAMP"], obj_df["ACC_TAN"], "k-")

axx.set_ylabel("x-position [m]")
axx.grid()

axy.set_ylabel("y-position [m]")
axy.grid()

axv.set_ylabel("velocity [km/h]")
axv.grid()

axa.set_ylabel("acceleration [m/s²]")
axa.grid()

axs[-1].set_xlabel("time [s]")