In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from typing import Tuple, Dict

import os
import ast
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from thor_magni_tools.data_tests.test_csv import validate_header, validate_header_with_dataframe

In [2]:
DIR_PATH = "../datasets/thor_magni_zenodo/"
SCENARIO_ID = "Scenario_1"

# Helper functions

In [3]:
def load_csv_metadata(path: str, header_size: int = 16) -> Tuple[pd.DataFrame, dict]:
    """Path to the csv file

    Parameters
    ----------
    path
        Path to the csv file
    header_size
        Number of rows for the header

    Returns
    -------
        Panda DataFrame and Dictionary with the metadata
    """
    raw_df = pd.read_csv(
        path,
        sep=",",
        header=header_size,
        index_col=1,
    )
    header_dict = {}
    with open(path, "r", newline="\n") as csvfile:
        csvreader = csv.reader(csvfile)

        # Read the first 16 rows and store them in the list
        for i, row in enumerate(csvreader):
            if i > header_size - 1:
                break
            key = row[0]
            values = row[1:]
            values = filter(lambda x: x != "", values)
            values = [int(v) if v.isnumeric() else v for v in values]
            header_dict[key] = values
    return raw_df, header_dict
    

In [4]:
def preprocessing_header(header_dict: dict) -> dict:
    """return header in a more readable manner"""
    new_header_dict = {
        "FILE_ID": header_dict["FILE_ID"][0],
        "MODALITIES": dict(
            zip(
                header_dict["MODALITIES_WITH_UNITS"],
                header_dict["MODALITIES_UNITS_SPECIFIED"],
            )
        ),
        "SENSOR_DATA": {
            "TRAJECTORIES": {
                "N_FRAMES": header_dict["N_FRAMES_QTM"][0],
                "N_BODIES": header_dict["N_BODIES"][0],
                "N_MARKERS": header_dict["N_MARKERS"][0],
                "CONTIGUOUS_ROTATION_MATRIX": ast.literal_eval(
                    header_dict["CONTIGUOUS_ROTATION_MATRIX"][0]
                ),
                "METADATA": {},
            },
            "EYETRACKING": {},
        },
    }

    trajectories_metadata = {
        body_name: {
            "ROLE": body_role,
            "NUMBER_OF_MARKERS": n_markers,
            "MARKERS_NAMES": [],
        }
        for body_name, body_role, n_markers in zip(
            header_dict["BODY_NAMES"],
            header_dict["BODY_ROLES"],
            header_dict["BODY_NR_MARKERS"],
        )
    }

    for marker_name in header_dict["MARKER_NAMES"]:
        body_name, marker_id = marker_name.split(" - ")
        trajectories_metadata[body_name]["MARKERS_NAMES"].append(marker_id)

    new_header_dict["SENSOR_DATA"]["TRAJECTORIES"]["METADATA"] = trajectories_metadata

    eyetracking_metadata = {
        eyetracking_device: {
            "FREQUENCY_IR": freq_ir,
            "FREQUENCY_SCENE_CAMERA": freq_cam,
            "METADATA": {},
        }
        for eyetracking_device, freq_ir, freq_cam in zip(
            header_dict["EYETRACKING_DEVICES"],
            header_dict["EYETRACKING_FREQUENCY_IR"],
            header_dict["EYETRACKING_FREQUENCY_SCENE_CAMERA"],
        )
    }
    for i, eye_tracking_data in enumerate(header_dict["EYETRACKING_DATA_INCLUDED"]):
        eyetracking_device, data_type = eye_tracking_data.split("_")
        eyetracking_metadata[eyetracking_device]["METADATA"].update(
            {f"{data_type}_N_FRAMES": header_dict["EYETRACKING_DATA_N_FRAMES"][i]}
        )
    new_header_dict["SENSOR_DATA"]["EYETRACKING"] = eyetracking_metadata

    return new_header_dict

In [5]:
files_list = os.listdir(os.path.join(DIR_PATH, SCENARIO_ID))
files_list

['THOR-Magni_180522_SC1B_R2.csv',
 'THOR-Magni_130522_SC1A_R1.csv',
 'THOR-Magni_120522_SC1B_R2.csv',
 'THOR-Magni_130522_SC1B_R1.csv',
 'THOR-Magni_180522_SC1B_R1.csv',
 'THOR-Magni_170522_SC1B_R2.csv',
 'THOR-Magni_130522_SC1A_R2.csv',
 'THOR-Magni_130522_SC1B_R2.csv',
 'THOR-Magni_180522_SC1A_R1.csv',
 'THOR-Magni_170522_SC1B_R1.csv',
 'THOR-Magni_120522_SC1A_R1.csv',
 'THOR-Magni_120522_SC1B_R1.csv',
 'THOR-Magni_180522_SC1A_R2.csv',
 'THOR-Magni_170522_SC1A_R1.csv',
 'THOR-Magni_170522_SC1A_R2.csv',
 'THOR-Magni_120522_SC1A_R2.csv']

In [7]:
raw_df, header_dict = load_csv_metadata(
    os.path.join(DIR_PATH, SCENARIO_ID, "THOR-Magni_120522_SC1B_R1.csv")
)

  raw_df = pd.read_csv(


In [8]:
raw_df

Unnamed: 0_level_0,Frame,DARKO_Robot - 1 X,DARKO_Robot - 1 Y,DARKO_Robot - 1 Z,DARKO_Robot - 2 X,DARKO_Robot - 2 Y,DARKO_Robot - 2 Z,DARKO_Robot - 3 X,DARKO_Robot - 3 Y,DARKO_Robot - 3 Z,...,LO1 Centroid_Z,LO1 R0,LO1 R1,LO1 R2,LO1 R3,LO1 R4,LO1 R5,LO1 R6,LO1 R7,LO1 R8
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.01,2,-4234.196,-779.604,692.075,-4006.843,-1284.119,691.486,-4989.450,-1034.982,299.463,...,,,,,,,,,,
0.02,3,-4234.260,-779.616,692.084,-4006.844,-1284.065,691.444,-4989.459,-1034.986,299.509,...,,,,,,,,,,
0.03,4,-4234.202,-779.574,692.078,-4002.962,-1284.731,693.225,-4989.447,-1034.985,299.474,...,,,,,,,,,,
0.04,5,-4234.223,-779.658,692.072,-4002.778,-1284.900,693.384,-4989.461,-1034.971,299.510,...,,,,,,,,,,
0.05,6,-4234.301,-779.719,692.071,-4002.967,-1285.126,693.366,-4989.850,-1035.042,299.012,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249.96,24997,-4232.016,-779.700,693.047,-4003.213,-1285.192,693.203,-4990.252,-1035.007,298.976,...,,,,,,,,,,
249.97,24998,-4232.011,-779.687,693.018,-4003.237,-1285.204,693.222,-4990.235,-1034.996,298.965,...,,,,,,,,,,
249.98,24999,-4231.976,-779.707,693.061,-4003.236,-1285.206,693.184,-4990.267,-1034.998,299.023,...,,,,,,,,,,
249.99,25000,-4231.953,-779.682,692.985,-4003.247,-1285.237,693.210,-4990.230,-1034.994,298.988,...,,,,,,,,,,


In [17]:
new_header_dict = preprocessing_header(header_dict)

In [19]:
"120522_SC1B_R1" in  "THOR-Magni_120522_SC1B_R1.csv"

True

In [18]:
new_header_dict

{'FILE_ID': '120522_SC1B_R1',
 'MODALITIES': {'Time': 's',
  'QTM_MARKER': 'mm',
  'QTM_CENTROID': 'mm',
  'EYETRACKER_2D_POINTS': 'px',
  'EYETRACKER_3D_POINTS': 'mm',
  'GYROSCOPE': 'deg/s',
  'ACCELEROMETER': 'm/s²'},
 'SENSOR_DATA': {'TRAJECTORIES': {'N_FRAMES': 25002,
   'N_BODIES': 15,
   'N_MARKERS': 47,
   'CONTIGUOUS_ROTATION_MATRIX': [['R0', 'R3', 'R6'],
    ['R1', 'R4', 'R7'],
    ['R2', 'R5', 'R8']],
   'METADATA': {'Helmet_6': {'ROLE': 'Alone',
     'NUMBER_OF_MARKERS': 5,
     'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
    'Helmet_4': {'ROLE': 'Group of 2',
     'NUMBER_OF_MARKERS': 5,
     'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
    'Helmet_5': {'ROLE': 'Group of 3',
     'NUMBER_OF_MARKERS': 5,
     'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
    'Helmet_1': {'ROLE': 'Alone',
     'NUMBER_OF_MARKERS': 7,
     'MARKERS_NAMES': ['1', '2', '3']},
    'Helmet_7': {'ROLE': 'Group of 2',
     'NUMBER_OF_MARKERS': 5,
     'MARKERS_NAMES': ['1', '2', '5', '6', '7']},
  

# Header validator

In [13]:
validate_header(files_list[0], new_header_dict)

[31;20m2023-10-23 16:39:15,640 - thor_magni_tools.data_tests.test_csv - ERROR - File name does not match (common.py:6)[0m
[31;20m2023-10-23 16:39:15,641 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER FAIL] N_BODIES = 15 but got description for             12 :( 
 Verbose: ['Helmet_6', 'Helmet_4', 'Helmet_5', 'Helmet_1', 'Helmet_7', 'Helmet_2', 'Helmet_8', 'Helmet_9', 'Helmet_10', 'Helmet_3', 'LO1', 'DARKO_Robot'] (common.py:6)[0m
[31;20m2023-10-23 16:39:15,642 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER FAIL]Helmet_1 NUMBER OF MARKERS (7)                 does not match size of MARKERS_NAMES (3) (common.py:6)[0m
[31;20m2023-10-23 16:39:15,642 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER FAIL] N_MAKERS=38 but got description for             42 :( 
 Verbose: [5, 5, 5, 7, 0, 0, 5, 0, 4, 4, 0, 7] (common.py:6)[0m


In [14]:
validate_header_with_dataframe(new_header_dict, raw_df)

[31;20m2023-10-23 16:39:17,589 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER/DF MISMATCH] N_BODIES = 15 in header but got             12 from the dataframe :( 
 Verbose: {'Helmet_1', 'Helmet_5', 'Helmet_4', 'DARKO_Robot', 'LO1', 'Helmet_10', 'Helmet_2', 'Helmet_6', 'Helmet_7', 'Helmet_8', 'Helmet_9', 'Helmet_3'} (common.py:6)[0m
[31;20m2023-10-23 16:39:17,591 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER/DF MISMATCH] for Helmet_1:                 Given by header: 7                 Given by dataframe: 3 (common.py:6)[0m


Get number of bodies from dataframe

In [15]:
raw_df[raw_df.columns[raw_df.columns.str.endswith(" X")]]

Unnamed: 0_level_0,DARKO_Robot - 1 X,DARKO_Robot - 2 X,DARKO_Robot - 3 X,DARKO_Robot - 4 X,DARKO_Robot - 5 X,DARKO_Robot - 6 X,DARKO_Robot - 7 X,Helmet_1 - 1 X,Helmet_1 - 2 X,Helmet_1 - 3 X,...,Helmet_6 - 1 X,Helmet_6 - 2 X,Helmet_6 - 3 X,Helmet_6 - 4 X,Helmet_6 - 5 X,Helmet_8 - 1 X,Helmet_8 - 2 X,Helmet_8 - 3 X,Helmet_8 - 4 X,Helmet_8 - 5 X
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.01,-4086.915,-3900.788,-4861.657,-4597.370,-3830.737,-4553.225,-4713.681,,,,...,,9218.697,,9184.100,,,,-6708.268,-6677.524,
0.02,-4088.637,-3900.805,-4861.716,-4599.107,-3830.851,-4553.292,-4713.773,,,,...,,9222.041,,9183.936,,,,-6707.203,-6677.306,
0.03,-4086.962,-3897.834,-4861.792,-4592.506,-3830.810,-4553.458,-4713.757,,,,...,,9222.319,,9182.088,,,,-6706.623,-6675.187,
0.04,-4087.025,-3897.797,-4861.844,-4595.394,-3830.815,-4553.373,-4713.732,,,,...,,9222.107,,9181.126,,,,-6709.021,-6676.572,
0.05,-4086.950,-3897.574,-4861.568,-4595.400,-3830.612,-4553.299,-4713.737,,,,...,9156.276,9223.792,9200.044,9180.580,9148.383,,,-6708.234,-6677.930,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239.77,-4086.836,-3897.252,-4862.343,-4592.940,-3830.993,-4553.251,-4714.329,,,-7403.542,...,4058.267,3972.105,3929.194,3858.349,3972.816,,8595.855,,8604.263,
239.78,-4086.815,-3897.303,-4862.400,-4593.009,-3830.955,-4553.244,-4714.381,,,-7407.144,...,4044.585,3958.090,3916.072,3845.346,3959.692,,8603.503,,8614.400,
239.79,-4086.824,-3897.263,-4862.341,-4593.022,-3830.993,-4553.259,-4714.283,,,-7412.396,...,4030.703,3944.157,3902.017,3832.182,3946.273,,8615.314,,8620.003,
239.80,-4086.794,-3897.256,-4862.349,-4594.679,-3830.924,-4553.296,-4714.305,,,-7414.475,...,4017.047,3930.392,3869.809,3819.116,,,8623.542,,8627.836,


In [13]:
unique_prefixes = set(col.split(' ')[0] for col in raw_df.columns if col != "Frame")

In [14]:
unique_prefixes

{'DARKO_Robot',
 'Helmet_1',
 'Helmet_10',
 'Helmet_2',
 'Helmet_3',
 'Helmet_4',
 'Helmet_5',
 'Helmet_6',
 'Helmet_7',
 'Helmet_8',
 'Helmet_9',
 'LO1'}

In [15]:
filtered_columns = [col for col in raw_df.columns if any(col.startswith(prefix) for prefix in unique_prefixes)]

In [16]:
columns_markers = [col[:-2] for col in filtered_columns if any(char.isdigit() for char in col) and " - " in col and col.endswith("X")]

In [17]:
columns_markers

['DARKO_Robot - 1',
 'DARKO_Robot - 2',
 'DARKO_Robot - 3',
 'DARKO_Robot - 4',
 'DARKO_Robot - 5',
 'DARKO_Robot - 6',
 'DARKO_Robot - 7',
 'Helmet_1 - 1',
 'Helmet_1 - 2',
 'Helmet_1 - 3',
 'Helmet_10 - 1',
 'Helmet_10 - 2',
 'Helmet_10 - 3',
 'Helmet_10 - 4',
 'Helmet_2 - 1',
 'Helmet_2 - 2',
 'Helmet_2 - 3',
 'Helmet_2 - 4',
 'Helmet_4 - 1',
 'Helmet_4 - 2',
 'Helmet_4 - 3',
 'Helmet_4 - 4',
 'Helmet_4 - 5',
 'Helmet_5 - 1',
 'Helmet_5 - 2',
 'Helmet_5 - 3',
 'Helmet_5 - 4',
 'Helmet_5 - 5',
 'Helmet_6 - 1',
 'Helmet_6 - 2',
 'Helmet_6 - 3',
 'Helmet_6 - 4',
 'Helmet_6 - 5',
 'Helmet_7 - 1',
 'Helmet_7 - 2',
 'Helmet_7 - 3',
 'Helmet_7 - 4',
 'Helmet_7 - 5']

In [18]:
bodies_markers = {}
for item in columns_markers:
    key, value = item.split(' - ')
    key = key.strip()  # Remove extra spaces around the key
    value = int(value)  # Convert the last digits to integers
    if key not in bodies_markers.keys():
        bodies_markers.setdefault(key, [])
    bodies_markers[key].append(value)

In [19]:
bodies_markers

{'DARKO_Robot': [1, 2, 3, 4, 5, 6, 7],
 'Helmet_1': [1, 2, 3],
 'Helmet_10': [1, 2, 3, 4],
 'Helmet_2': [1, 2, 3, 4],
 'Helmet_4': [1, 2, 3, 4, 5],
 'Helmet_5': [1, 2, 3, 4, 5],
 'Helmet_6': [1, 2, 3, 4, 5],
 'Helmet_7': [1, 2, 3, 4, 5]}

In [20]:
new_header_dict["SENSOR_DATA"]["TRAJECTORIES"]["METADATA"]

{'Helmet_10': {'ROLE': 'Alone',
  'NUMBER_OF_MARKERS': 4,
  'MARKERS_NAMES': ['1', '2', '3', '4']},
 'Helmet_5': {'ROLE': 'Group of 2',
  'NUMBER_OF_MARKERS': 5,
  'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
 'Helmet_1': {'ROLE': 'Group of 2',
  'NUMBER_OF_MARKERS': 7,
  'MARKERS_NAMES': ['1', '2', '3']},
 'Helmet_3': {'ROLE': 'N/A', 'NUMBER_OF_MARKERS': 0, 'MARKERS_NAMES': []},
 'Helmet_2': {'ROLE': 'Alone',
  'NUMBER_OF_MARKERS': 4,
  'MARKERS_NAMES': ['1', '2', '3', '4']},
 'Helmet_9': {'ROLE': 'N/A', 'NUMBER_OF_MARKERS': 0, 'MARKERS_NAMES': []},
 'Helmet_4': {'ROLE': 'Group of 2',
  'NUMBER_OF_MARKERS': 5,
  'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
 'Helmet_8': {'ROLE': 'N/A', 'NUMBER_OF_MARKERS': 0, 'MARKERS_NAMES': []},
 'Helmet_6': {'ROLE': 'Group of 2',
  'NUMBER_OF_MARKERS': 5,
  'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
 'Helmet_7': {'ROLE': 'Alone',
  'NUMBER_OF_MARKERS': 5,
  'MARKERS_NAMES': ['1', '2', '3', '4', '5']},
 'DARKO_Robot': {'ROLE': 'N/A',
  'NUMBER_OF_M

In [21]:
validate_header_with_dataframe(new_header_dict, raw_df)

[31;20m2023-10-23 15:02:24,703 - thor_magni_tools.data_tests.test_csv - ERROR - [HEADER/DF MISMATCH] for Helmet_1:                 Given by header: 7                 Given by dataframe: 3 (common.py:6)[0m


# Preprocessing