In [1]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import sys
sys.path.append("..")

from magni_dash.data_preprocessing.spatio_temporal_features import SpatioTemporalFeatures
from magni_dash.config.constants import TRAJECTORY_SAMPLES_PATH, TRAJECTORY_DATA_TYPE

In [2]:
os.listdir(os.path.join(TRAJECTORY_SAMPLES_PATH, "Scenario1"))

['Qualisys_170522_SC1B_R01_pp.tsv']

In [3]:
df_path = os.path.join(TRAJECTORY_SAMPLES_PATH, "Scenario1",'Qualisys_170522_SC1B_R01_pp.tsv')
sep="\t"
header=11
index_col="Frame"
height_suffix = "Z"

In [6]:
raw_df = pd.read_csv(
    df_path,
    sep=sep,
    header=header,
    index_col=index_col,
)
if TRAJECTORY_DATA_TYPE == "2D":
    raw_df = raw_df[raw_df.columns[~raw_df.columns.str.endswith(height_suffix)]]
raw_df = raw_df.dropna(axis=1, how="all")
raw_df = raw_df.interpolate()
raw_df[
    raw_df.columns[
        (raw_df.columns.str.endswith("X"))
        | ((raw_df.columns.str.endswith("Y")))
        | ((raw_df.columns.str.endswith("Z")))
    ]
] /= 1000
raw_df = raw_df.loc[
    :,
    (~raw_df.columns.str.contains("^Unnamed")) & (~raw_df.columns.str.contains("Type")),
]

In [7]:
raw_df.columns

Index(['Time', 'Helmet_10 - 1 X', 'Helmet_10 - 1 Y', 'Helmet_10 - 2 X',
       'Helmet_10 - 2 Y', 'Helmet_10 - 3 X', 'Helmet_10 - 3 Y',
       'Helmet_10 - 4 X', 'Helmet_10 - 4 Y', 'Helmet_5 - 1 X',
       'Helmet_5 - 1 Y', 'Helmet_5 - 2 X', 'Helmet_5 - 2 Y', 'Helmet_5 - 3 X',
       'Helmet_5 - 3 Y', 'Helmet_5 - 4 X', 'Helmet_5 - 4 Y', 'Helmet_5 - 5 X',
       'Helmet_5 - 5 Y', 'Helmet_1 - 1 X', 'Helmet_1 - 1 Y', 'Helmet_1 - 2 X',
       'Helmet_1 - 2 Y', 'Helmet_1 - 3 X', 'Helmet_1 - 3 Y', 'Helmet_2 - 1 X',
       'Helmet_2 - 1 Y', 'Helmet_2 - 2 X', 'Helmet_2 - 2 Y', 'Helmet_2 - 3 X',
       'Helmet_2 - 3 Y', 'Helmet_2 - 4 X', 'Helmet_2 - 4 Y', 'Helmet_4 - 1 X',
       'Helmet_4 - 1 Y', 'Helmet_4 - 2 X', 'Helmet_4 - 2 Y', 'Helmet_4 - 3 X',
       'Helmet_4 - 3 Y', 'Helmet_4 - 4 X', 'Helmet_4 - 4 Y', 'Helmet_4 - 5 X',
       'Helmet_4 - 5 Y', 'Helmet_8 - 1 X', 'Helmet_8 - 1 Y', 'Helmet_8 - 2 X',
       'Helmet_8 - 2 Y', 'Helmet_8 - 3 X', 'Helmet_8 - 3 Y', 'Helmet_8 - 4 X',
       'Hel

In [8]:
helmets = raw_df.columns[raw_df.columns.str.startswith("Helmet")].tolist()
helmets = set(map(lambda x: x.split(" - ")[0], helmets))
list(helmets)

['Helmet_6',
 'Helmet_4',
 'Helmet_1',
 'Helmet_8',
 'Helmet_2',
 'Helmet_5',
 'Helmet_10']

In [9]:
features_df = SpatioTemporalFeatures.get_speed(
        raw_df,
        time_col_name="Time",
        element_name=list(helmets),
    )

In [10]:
features_df.columns[features_df.columns.str.contains("speed")]

Index(['Helmet_6 - 1 speed (m/s)', 'Helmet_6 - 2 speed (m/s)',
       'Helmet_6 - 3 speed (m/s)', 'Helmet_6 - 4 speed (m/s)',
       'Helmet_6 - 5 speed (m/s)', 'Helmet_4 - 1 speed (m/s)',
       'Helmet_4 - 2 speed (m/s)', 'Helmet_4 - 3 speed (m/s)',
       'Helmet_4 - 4 speed (m/s)', 'Helmet_4 - 5 speed (m/s)',
       'Helmet_1 - 1 speed (m/s)', 'Helmet_1 - 2 speed (m/s)',
       'Helmet_1 - 3 speed (m/s)', 'Helmet_8 - 1 speed (m/s)',
       'Helmet_8 - 2 speed (m/s)', 'Helmet_8 - 3 speed (m/s)',
       'Helmet_8 - 4 speed (m/s)', 'Helmet_8 - 5 speed (m/s)',
       'Helmet_2 - 1 speed (m/s)', 'Helmet_2 - 2 speed (m/s)',
       'Helmet_2 - 3 speed (m/s)', 'Helmet_2 - 4 speed (m/s)',
       'Helmet_5 - 1 speed (m/s)', 'Helmet_5 - 2 speed (m/s)',
       'Helmet_5 - 3 speed (m/s)', 'Helmet_5 - 4 speed (m/s)',
       'Helmet_5 - 5 speed (m/s)', 'Helmet_10 - 1 speed (m/s)',
       'Helmet_10 - 2 speed (m/s)', 'Helmet_10 - 3 speed (m/s)',
       'Helmet_10 - 4 speed (m/s)'],
      dtype='ob

In [11]:
features_df

Unnamed: 0_level_0,Helmet_6 - 1 X_delta,Helmet_6 - 1 Y_delta,1_displacement,Helmet_6 - 2 X_delta,Helmet_6 - 2 Y_delta,2_displacement,Helmet_6 - 3 X_delta,Helmet_6 - 3 Y_delta,3_displacement,Helmet_6 - 4 X_delta,...,Helmet_10 - 3 Y_delta,3_displacement,Helmet_10 - 4 X_delta,Helmet_10 - 4 Y_delta,4_displacement,Time_delta,Helmet_10 - 1 speed (m/s),Helmet_10 - 2 speed (m/s),Helmet_10 - 3 speed (m/s),Helmet_10 - 4 speed (m/s)
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.00,,,,
15,0.002558,-0.001233,0.002840,0.002316,-0.001148,0.002585,0.001896,-0.001329,0.002315,0.010322,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
16,0.002450,-0.001336,0.002791,0.002186,-0.001112,0.002453,0.002046,-0.001479,0.002525,0.002463,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
17,0.002443,-0.001544,0.002890,0.002264,-0.001320,0.002621,0.006063,-0.000041,0.006063,0.002636,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
18,0.002212,-0.001591,0.002725,0.002272,-0.001447,0.002694,0.001908,-0.001694,0.002551,-0.004913,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23894,-0.000266,0.001719,0.001739,0.000348,0.001769,0.001803,0.000040,0.001481,0.001482,0.000471,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
23895,0.000082,0.001505,0.001507,0.000448,0.001484,0.001550,-0.000101,0.001784,0.001787,0.000264,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
23896,-0.000369,0.001812,0.001849,0.000287,0.001869,0.001891,-0.000184,0.001714,0.001724,0.000526,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
23897,-0.000152,0.001536,0.001544,0.000525,0.001691,0.001771,-0.000164,0.001795,0.001802,0.000295,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0


In [12]:
features_cat = raw_df.join(features_df)
features_filtered = features_cat[
    features_cat.columns[
        (features_cat.columns.str.endswith("X"))
        | (features_cat.columns.str.endswith("Y"))
        | (features_cat.columns.str.endswith("speed (m/s)"))
    ]
]

In [13]:
element = "Helmet"
if element not in ["Helmet", "DARKO"]:
    raise ValueError(f"{element} must be in ['Helmet', 'DARKO']")
element_pat = (
                r"DARKO - (\d) "
                if element == "DARKO"
                else r"Helmet_(\d+ - \d).*"
            )
elements_grouped = features_filtered.groupby(
    features_filtered.columns.str.extract(element_pat, expand=False),
    axis=1,
)

In [14]:
def get_mapping_cols(element, group_name):
    sep = " - " if element == "DARKO" else "_"
    return {
        element + sep + group_name + " " + "X": "X (m)",
        element + sep + group_name + " " + "Y": "Y (m)",
        element + sep + group_name + " " + "Z": "Z (m)",
        element + sep + group_name + " " + "speed (m/s)" : "speed (m/s)"  
    }

In [15]:
groups = []
for group_name, group in elements_grouped:
    _mapping_cols = get_mapping_cols(element, group_name)
    group = group.rename(_mapping_cols, axis=1)
    eid = element + "_" + group_name.split(" - ")[0] if element == "Helmet" else element 
    mid = group_name.split(" - ")[1] if element == "Helmet" else group_name
    group["eid"] = eid
    group["mid"] = mid
    groups.append(group)

In [16]:
elements_concatenated= pd.concat(groups, axis=0)

In [17]:
elements_concatenated

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,-8.742446,0.404269,,Helmet_1,1
15,-8.743438,0.404353,0.099555,Helmet_1,1
16,-8.741762,0.404529,0.168522,Helmet_1,1
17,-8.743643,0.404476,0.188175,Helmet_1,1
18,-8.743100,0.404466,0.054309,Helmet_1,1
...,...,...,...,...,...
23894,-0.947265,-3.683778,0.398491,Helmet_8,5
23895,-0.951228,-3.683769,0.396301,Helmet_8,5
23896,-0.955384,-3.683598,0.415952,Helmet_8,5
23897,-0.949395,-3.693991,1.199511,Helmet_8,5


In [18]:
elements_concatenated[elements_concatenated.eid == "Helmet_4"]

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,9.207740,-0.919807,,Helmet_4,1
15,9.209320,-0.921894,0.261763,Helmet_4,1
16,9.206412,-0.924998,0.425338,Helmet_4,1
17,9.203235,-0.927463,0.402114,Helmet_4,1
18,9.202336,-0.930205,0.288561,Helmet_4,1
...,...,...,...,...,...
23894,2.739587,0.278987,1.481143,Helmet_4,5
23895,2.729274,0.288032,1.371751,Helmet_4,5
23896,2.719193,0.296386,1.309259,Helmet_4,5
23897,2.709284,0.304398,1.274286,Helmet_4,5


# min NaNs

In [19]:
helmets = elements_concatenated.eid.unique()
helmets

array(['Helmet_1', 'Helmet_10', 'Helmet_2', 'Helmet_4', 'Helmet_5',
       'Helmet_6', 'Helmet_8'], dtype=object)

In [20]:
nan_counter_by_marker = {}
for helmet_id in helmets:
    nan_counter_by_marker[helmet_id] = {}
    markers = elements_concatenated[elements_concatenated.eid == helmet_id].mid.unique()
    for marker_id in markers:
        n_nans = (
            elements_concatenated[
                (elements_concatenated.eid == helmet_id)
                & (elements_concatenated.mid == marker_id)
            ]["X (m)"]
            .isna()
            .sum()
        )
        nan_counter_by_marker[helmet_id][marker_id] = n_nans

In [21]:
nan_counter_by_marker

{'Helmet_1': {'1': 0, '2': 0, '3': 0},
 'Helmet_10': {'1': 147, '2': 109, '3': 124, '4': 147},
 'Helmet_2': {'1': 0, '2': 0, '3': 0, '4': 0},
 'Helmet_4': {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0},
 'Helmet_5': {'1': 578, '2': 546, '3': 1123, '4': 1153, '5': 1080},
 'Helmet_6': {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0},
 'Helmet_8': {'1': 294, '2': 222, '3': 0, '4': 294, '5': 343}}

In [22]:
best_markers, elements_filtered_by_best_marker = {}, []
for instance_id, nans_counter in nan_counter_by_marker.items():
    best_markers[instance_id] = min(
        nans_counter, key=nans_counter.get
    )
    elements_filtered_by_best_marker.append(
            elements_concatenated[
                (elements_concatenated.eid == instance_id)
                & (elements_concatenated.mid == best_markers[instance_id])
            ]
        )

In [23]:
best_markers

{'Helmet_1': '1',
 'Helmet_10': '2',
 'Helmet_2': '1',
 'Helmet_4': '1',
 'Helmet_5': '2',
 'Helmet_6': '1',
 'Helmet_8': '3'}

In [24]:
best_makers_df = pd.concat(elements_filtered_by_best_marker, axis=0)

In [25]:
best_makers_df = best_makers_df.sort_index()

In [26]:
best_makers_df

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,-8.742446,0.404269,,Helmet_1,1
14,-1.538172,-3.326199,,Helmet_6,1
14,-0.184898,3.232820,,Helmet_2,1
14,9.207740,-0.919807,,Helmet_4,1
14,,,,Helmet_5,2
...,...,...,...,...,...
23898,2.687843,0.248632,1.287887,Helmet_4,1
23898,-6.096314,-0.154012,0.000000,Helmet_10,2
23898,6.127391,2.336592,1.181323,Helmet_1,1
23898,-1.541328,-3.827607,0.167244,Helmet_6,1
