In [1]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import sys
sys.path.append("..")

from magni_dash.data_preprocessing.spatio_temporal_features import SpatioTemporalFeatures
from magni_dash.config.constants import TRAJECTORY_SAMPLES_PATH, TRAJECTORY_DATA_TYPE

In [2]:
os.listdir(os.path.join(TRAJECTORY_SAMPLES_PATH, "Scenario1"))

['Qualisys_170522_SC1B_R01_pp.tsv', 'Qualisys_170522_SC1A_R02_pp.tsv']

In [3]:
df_path = os.path.join(TRAJECTORY_SAMPLES_PATH, "Scenario1",'Qualisys_170522_SC1B_R01_pp.tsv')
sep="\t"
header=11
index_col="Frame"
height_suffix = "Z"

In [4]:
raw_df = pd.read_csv(
    df_path,
    sep=sep,
    header=header,
    index_col=index_col,
)
if TRAJECTORY_DATA_TYPE == "2D":
    raw_df = raw_df[raw_df.columns[~raw_df.columns.str.endswith(height_suffix)]]

raw_df = raw_df.dropna(axis=1, how="all")
raw_df = raw_df.loc[
    :,
    (~raw_df.columns.str.contains("^Unnamed"))
    & (~raw_df.columns.str.contains("Type")),
]

In [10]:
raw_df[["Time", "Helmet_8 - 3 X","Helmet_8 - 3 Y"]].iloc[10800:10860]

Unnamed: 0_level_0,Time,Helmet_8 - 3 X,Helmet_8 - 3 Y
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10814,108.13,,
10815,108.14,,
10816,108.15,,
10817,108.16,,
10818,108.17,,
10819,108.18,,
10820,108.19,,
10821,108.2,,
10822,108.21,,
10823,108.22,,


In [8]:
preprocessed_df = raw_df.copy()
preprocessed_df = preprocessed_df.interpolate()
preprocessed_df[
    preprocessed_df.columns[
        (preprocessed_df.columns.str.endswith("X"))
        | ((preprocessed_df.columns.str.endswith("Y")))
        | ((preprocessed_df.columns.str.endswith("Z")))
    ]
] /= 1000

In [11]:
preprocessed_df[["Time", "Helmet_8 - 3 X","Helmet_8 - 3 Y"]].iloc[10800:10860]

Unnamed: 0_level_0,Time,Helmet_8 - 3 X,Helmet_8 - 3 Y
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10814,108.13,-6.715685,-2.266779
10815,108.14,-6.768422,-2.282932
10816,108.15,-6.82116,-2.299086
10817,108.16,-6.873897,-2.315239
10818,108.17,-6.926634,-2.331393
10819,108.18,-6.979371,-2.347546
10820,108.19,-7.032108,-2.363699
10821,108.2,-7.084845,-2.379853
10822,108.21,-7.137582,-2.396006
10823,108.22,-7.190319,-2.41216


In [6]:
helmets = raw_df.columns[raw_df.columns.str.startswith("Helmet")].tolist()
helmets = set(map(lambda x: x.split(" - ")[0], helmets))
list(helmets)

['Helmet_6',
 'Helmet_10',
 'Helmet_1',
 'Helmet_4',
 'Helmet_2',
 'Helmet_5',
 'Helmet_8']

In [7]:
features_df = SpatioTemporalFeatures.get_speed(
        raw_df,
        time_col_name="Time",
        element_name=list(helmets),
    )

In [8]:
features_df.columns[features_df.columns.str.contains("speed")]

Index(['Helmet_6 - 1 speed (m/s)', 'Helmet_6 - 2 speed (m/s)',
       'Helmet_6 - 3 speed (m/s)', 'Helmet_6 - 4 speed (m/s)',
       'Helmet_6 - 5 speed (m/s)', 'Helmet_10 - 1 speed (m/s)',
       'Helmet_10 - 2 speed (m/s)', 'Helmet_10 - 3 speed (m/s)',
       'Helmet_10 - 4 speed (m/s)', 'Helmet_1 - 1 speed (m/s)',
       'Helmet_1 - 2 speed (m/s)', 'Helmet_1 - 3 speed (m/s)',
       'Helmet_4 - 1 speed (m/s)', 'Helmet_4 - 2 speed (m/s)',
       'Helmet_4 - 3 speed (m/s)', 'Helmet_4 - 4 speed (m/s)',
       'Helmet_4 - 5 speed (m/s)', 'Helmet_2 - 1 speed (m/s)',
       'Helmet_2 - 2 speed (m/s)', 'Helmet_2 - 3 speed (m/s)',
       'Helmet_2 - 4 speed (m/s)', 'Helmet_5 - 1 speed (m/s)',
       'Helmet_5 - 2 speed (m/s)', 'Helmet_5 - 3 speed (m/s)',
       'Helmet_5 - 4 speed (m/s)', 'Helmet_5 - 5 speed (m/s)',
       'Helmet_8 - 1 speed (m/s)', 'Helmet_8 - 2 speed (m/s)',
       'Helmet_8 - 3 speed (m/s)', 'Helmet_8 - 4 speed (m/s)',
       'Helmet_8 - 5 speed (m/s)'],
      dtype='ob

In [9]:
features_df

Unnamed: 0_level_0,Helmet_6 - 1 X_delta,Helmet_6 - 1 Y_delta,1_displacement,Helmet_6 - 2 X_delta,Helmet_6 - 2 Y_delta,2_displacement,Helmet_6 - 3 X_delta,Helmet_6 - 3 Y_delta,3_displacement,Helmet_6 - 4 X_delta,...,4_displacement,Helmet_8 - 5 X_delta,Helmet_8 - 5 Y_delta,5_displacement,Time_delta,Helmet_8 - 1 speed (m/s),Helmet_8 - 2 speed (m/s),Helmet_8 - 3 speed (m/s),Helmet_8 - 4 speed (m/s),Helmet_8 - 5 speed (m/s)
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.00,,,,,
15,0.002558,-0.001233,0.002840,0.002316,-0.001148,0.002585,0.001896,-0.001329,0.002315,0.010322,...,0.000000,0.000000,0.000000,0.000000,0.01,0.000000,0.000000,0.099207,0.000000,0.000000
16,0.002450,-0.001336,0.002791,0.002186,-0.001112,0.002453,0.002046,-0.001479,0.002525,0.002463,...,0.000000,0.000000,0.000000,0.000000,0.01,0.000000,0.000000,0.104633,0.000000,0.000000
17,0.002443,-0.001544,0.002890,0.002264,-0.001320,0.002621,0.006063,-0.000041,0.006063,0.002636,...,0.000000,0.000000,0.000000,0.000000,0.01,0.000000,0.000000,0.107213,0.000000,0.000000
18,0.002212,-0.001591,0.002725,0.002272,-0.001447,0.002694,0.001908,-0.001694,0.002551,-0.004913,...,0.000000,0.000000,0.000000,0.000000,0.01,0.000000,0.000000,0.105979,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23894,-0.000266,0.001719,0.001739,0.000348,0.001769,0.001803,0.000040,0.001481,0.001482,0.000471,...,0.001310,-0.003889,-0.000869,0.003985,0.01,0.310250,0.233102,0.255385,0.130979,0.398491
23895,0.000082,0.001505,0.001507,0.000448,0.001484,0.001550,-0.000101,0.001784,0.001787,0.000264,...,0.001412,-0.003963,0.000009,0.003963,0.01,0.291185,0.236286,0.240225,0.141227,0.396301
23896,-0.000369,0.001812,0.001849,0.000287,0.001869,0.001891,-0.000184,0.001714,0.001724,0.000526,...,0.001307,-0.004156,0.000171,0.004160,0.01,0.289937,0.244190,0.229729,0.130734,0.415952
23897,-0.000152,0.001536,0.001544,0.000525,0.001691,0.001771,-0.000164,0.001795,0.001802,0.000295,...,0.014830,0.005989,-0.010393,0.011995,0.01,0.251250,1.496475,0.220745,1.482958,1.199511


In [10]:
features_cat = raw_df.join(features_df)
features_filtered = features_cat[
    features_cat.columns[
        (features_cat.columns.str.endswith("X"))
        | (features_cat.columns.str.endswith("Y"))
        | (features_cat.columns.str.endswith("speed (m/s)"))
    ]
]

In [15]:
features_filtered[features_filtered.columns[features_filtered.columns.str.endswith("(m/s)")]].max()

Helmet_6 - 1 speed (m/s)      3.377359
Helmet_6 - 2 speed (m/s)      4.471432
Helmet_6 - 3 speed (m/s)      5.477929
Helmet_6 - 4 speed (m/s)     14.747031
Helmet_6 - 5 speed (m/s)      3.602503
Helmet_10 - 1 speed (m/s)     5.551932
Helmet_10 - 2 speed (m/s)     3.612774
Helmet_10 - 3 speed (m/s)     4.672214
Helmet_10 - 4 speed (m/s)     4.418039
Helmet_1 - 1 speed (m/s)      4.023035
Helmet_1 - 2 speed (m/s)      5.138597
Helmet_1 - 3 speed (m/s)     23.900774
Helmet_4 - 1 speed (m/s)      3.570360
Helmet_4 - 2 speed (m/s)      4.355837
Helmet_4 - 3 speed (m/s)      7.494676
Helmet_4 - 4 speed (m/s)      6.215025
Helmet_4 - 5 speed (m/s)      5.499942
Helmet_2 - 1 speed (m/s)      4.223908
Helmet_2 - 2 speed (m/s)      4.677674
Helmet_2 - 3 speed (m/s)      6.377628
Helmet_2 - 4 speed (m/s)      5.679050
Helmet_5 - 1 speed (m/s)     12.539164
Helmet_5 - 2 speed (m/s)      3.709197
Helmet_5 - 3 speed (m/s)      7.046632
Helmet_5 - 4 speed (m/s)     11.458573
Helmet_5 - 5 speed (m/s) 

In [33]:
features_filtered.columns[features_filtered.columns.str.endswith("Time")]

Index([], dtype='object')

In [37]:
features_filtered[[ "Helmet_8 - 3 X","Helmet_8 - 3 Y", "Helmet_8 - 3 speed (m/s)"]].iloc[10800:10860]

Unnamed: 0_level_0,Helmet_8 - 3 X,Helmet_8 - 3 Y,Helmet_8 - 3 speed (m/s)
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10814,-6.715685,-2.266779,5.515548
10815,-6.768422,-2.282932,5.515548
10816,-6.82116,-2.299086,5.515548
10817,-6.873897,-2.315239,5.515548
10818,-6.926634,-2.331393,5.515548
10819,-6.979371,-2.347546,5.515548
10820,-7.032108,-2.363699,5.515548
10821,-7.084845,-2.379853,5.515548
10822,-7.137582,-2.396006,5.515548
10823,-7.190319,-2.41216,5.515548


In [26]:
features_filtered[["Helmet_8 - 3 X", "Helmet_8 - 3 speed (m/s)"]]

Unnamed: 0_level_0,Helmet_8 - 3 X,Helmet_8 - 3 speed (m/s)
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1
14,-0.897056,
15,-0.897412,0.099207
16,-0.897607,0.104633
17,-0.897894,0.107213
18,-0.897935,0.105979
...,...,...
23894,-0.949442,0.255385
23895,-0.949407,0.240225
23896,-0.949484,0.229729
23897,-0.949677,0.220745


In [11]:
element = "Helmet"
if element not in ["Helmet", "DARKO"]:
    raise ValueError(f"{element} must be in ['Helmet', 'DARKO']")
element_pat = (
                r"DARKO - (\d) "
                if element == "DARKO"
                else r"Helmet_(\d+ - \d).*"
            )
elements_grouped = features_filtered.groupby(
    features_filtered.columns.str.extract(element_pat, expand=False),
    axis=1,
)

In [12]:
def get_mapping_cols(element, group_name):
    sep = " - " if element == "DARKO" else "_"
    return {
        element + sep + group_name + " " + "X": "X (m)",
        element + sep + group_name + " " + "Y": "Y (m)",
        element + sep + group_name + " " + "Z": "Z (m)",
        element + sep + group_name + " " + "speed (m/s)" : "speed (m/s)"  
    }

In [13]:
groups = []
for group_name, group in elements_grouped:
    _mapping_cols = get_mapping_cols(element, group_name)
    group = group.rename(_mapping_cols, axis=1)
    eid = element + "_" + group_name.split(" - ")[0] if element == "Helmet" else element 
    mid = group_name.split(" - ")[1] if element == "Helmet" else group_name
    group["eid"] = eid
    group["mid"] = mid
    groups.append(group)

In [14]:
elements_concatenated= pd.concat(groups, axis=0)

In [15]:
elements_concatenated

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,-8.742446,0.404269,,Helmet_1,1
15,-8.743438,0.404353,0.099555,Helmet_1,1
16,-8.741762,0.404529,0.168522,Helmet_1,1
17,-8.743643,0.404476,0.188175,Helmet_1,1
18,-8.743100,0.404466,0.054309,Helmet_1,1
...,...,...,...,...,...
23894,-0.947265,-3.683778,0.398491,Helmet_8,5
23895,-0.951228,-3.683769,0.396301,Helmet_8,5
23896,-0.955384,-3.683598,0.415952,Helmet_8,5
23897,-0.949395,-3.693991,1.199511,Helmet_8,5


In [16]:
elements_concatenated[elements_concatenated.eid == "Helmet_4"]

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,9.207740,-0.919807,,Helmet_4,1
15,9.209320,-0.921894,0.261763,Helmet_4,1
16,9.206412,-0.924998,0.425338,Helmet_4,1
17,9.203235,-0.927463,0.402114,Helmet_4,1
18,9.202336,-0.930205,0.288561,Helmet_4,1
...,...,...,...,...,...
23894,2.739587,0.278987,1.481143,Helmet_4,5
23895,2.729274,0.288032,1.371751,Helmet_4,5
23896,2.719193,0.296386,1.309259,Helmet_4,5
23897,2.709284,0.304398,1.274286,Helmet_4,5


# min NaNs

In [17]:
helmets = elements_concatenated.eid.unique()
helmets

array(['Helmet_1', 'Helmet_10', 'Helmet_2', 'Helmet_4', 'Helmet_5',
       'Helmet_6', 'Helmet_8'], dtype=object)

In [18]:
nan_counter_by_marker = {}
for helmet_id in helmets:
    nan_counter_by_marker[helmet_id] = {}
    markers = elements_concatenated[elements_concatenated.eid == helmet_id].mid.unique()
    for marker_id in markers:
        n_nans = (
            elements_concatenated[
                (elements_concatenated.eid == helmet_id)
                & (elements_concatenated.mid == marker_id)
            ]["X (m)"]
            .isna()
            .sum()
        )
        nan_counter_by_marker[helmet_id][marker_id] = n_nans

In [19]:
nan_counter_by_marker

{'Helmet_1': {'1': 0, '2': 0, '3': 0},
 'Helmet_10': {'1': 147, '2': 109, '3': 124, '4': 147},
 'Helmet_2': {'1': 0, '2': 0, '3': 0, '4': 0},
 'Helmet_4': {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0},
 'Helmet_5': {'1': 578, '2': 546, '3': 1123, '4': 1153, '5': 1080},
 'Helmet_6': {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0},
 'Helmet_8': {'1': 294, '2': 222, '3': 0, '4': 294, '5': 343}}

In [20]:
best_markers, elements_filtered_by_best_marker = {}, []
for instance_id, nans_counter in nan_counter_by_marker.items():
    best_markers[instance_id] = min(
        nans_counter, key=nans_counter.get
    )
    elements_filtered_by_best_marker.append(
            elements_concatenated[
                (elements_concatenated.eid == instance_id)
                & (elements_concatenated.mid == best_markers[instance_id])
            ]
        )

In [21]:
best_markers

{'Helmet_1': '1',
 'Helmet_10': '2',
 'Helmet_2': '1',
 'Helmet_4': '1',
 'Helmet_5': '2',
 'Helmet_6': '1',
 'Helmet_8': '3'}

In [22]:
best_markers_df = pd.concat(elements_filtered_by_best_marker, axis=0)

In [23]:
best_markers_df = best_markers_df.sort_index()

In [24]:
best_markers_df

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,-8.742446,0.404269,,Helmet_1,1
14,-1.538172,-3.326199,,Helmet_6,1
14,-0.184898,3.232820,,Helmet_2,1
14,9.207740,-0.919807,,Helmet_4,1
14,,,,Helmet_5,2
...,...,...,...,...,...
23898,2.687843,0.248632,1.287887,Helmet_4,1
23898,-6.096314,-0.154012,0.000000,Helmet_10,2
23898,6.127391,2.336592,1.181323,Helmet_1,1
23898,-1.541328,-3.827607,0.167244,Helmet_6,1


In [27]:
best_markers_df.index.min()

14

In [28]:
best_markers_df.index.max()

23898

In [32]:
best_markers_df[
    (best_markers_df.eid == "Helmet_8")
    & (best_markers_df.index > 10800)
    & (best_markers_df.index < 10850)
]

Unnamed: 0_level_0,X (m),Y (m),speed (m/s),eid,mid
Frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10801,-6.030104,-2.056784,5.515548,Helmet_8,3
10802,-6.082841,-2.072938,5.515548,Helmet_8,3
10803,-6.135578,-2.089091,5.515548,Helmet_8,3
10804,-6.188315,-2.105245,5.515548,Helmet_8,3
10805,-6.241052,-2.121398,5.515548,Helmet_8,3
10806,-6.293789,-2.137552,5.515548,Helmet_8,3
10807,-6.346526,-2.153705,5.515548,Helmet_8,3
10808,-6.399263,-2.169858,5.515548,Helmet_8,3
10809,-6.452,-2.186012,5.515548,Helmet_8,3
10810,-6.504737,-2.202165,5.515548,Helmet_8,3


In [43]:
features_df.columns[features_df.columns.str.endswith("_displacement")]

Index(['1_displacement', '2_displacement', '3_displacement', '4_displacement',
       '5_displacement', '1_displacement', '2_displacement', '3_displacement',
       '4_displacement', '5_displacement', '1_displacement', '2_displacement',
       '3_displacement', '4_displacement', '1_displacement', '2_displacement',
       '3_displacement', '4_displacement', '5_displacement', '1_displacement',
       '2_displacement', '3_displacement', '4_displacement', '5_displacement',
       '1_displacement', '2_displacement', '3_displacement', '1_displacement',
       '2_displacement', '3_displacement', '4_displacement'],
      dtype='object')

In [36]:
features_df[["8_displacement","Helmet_8 - 3 speed (m/s)"]].iloc[10800:10860]

KeyError: "['8_displacement'] not in index"