In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from pathlib import Path
from tqdm import tqdm
from scipy.spatial.transform import Rotation
import itertools

# Load transformed mocap and IMU files

In [2]:
root_path = Path("E:/Datasets/0-Processed_Datasets/2023-11-15_imu-vicon-data")
with open(Path(f'{root_path.joinpath("valid_files.txt")}')) as f:
    valid_files = f.readlines()
    valid_files = [x.strip() for x in valid_files]

In [3]:
valid_files[:5]

['S01/S01_E1_R_1',
 'S01/S01_E1_R_2',
 'S01/S01_E2_R_1',
 'S01/S01_E2_R_2',
 'S01/S01_E3_R_1']

In [4]:
mocap_files = [Path(root_path.joinpath(f'{file}_mocap_tfm.csv')) for file in valid_files]
imu_files = [Path(root_path.joinpath(f'{file}_imu.csv')) for file in valid_files]

In [5]:
mocap_files[0], imu_files[0]

(WindowsPath('E:/Datasets/0-Processed_Datasets/2023-11-15_imu-vicon-data/S01/S01_E1_R_1_mocap_tfm.csv'),
 WindowsPath('E:/Datasets/0-Processed_Datasets/2023-11-15_imu-vicon-data/S01/S01_E1_R_1_imu.csv'))

# Get start and end times from IMU files

In [38]:
df_imu = pd.read_csv(imu_files[0])

# Rename sensors
df_imu.loc[df_imu[" Sensor Type"] == 1, " Sensor Type"] = "Accelerometer"
df_imu.loc[df_imu[" Sensor Type"] == 2, " Sensor Type"] = "Magnetometer"
df_imu.loc[df_imu[" Sensor Type"] == 4, " Sensor Type"] = "Gyroscope"
df_imu

Unnamed: 0,Sample Time [s],Sensor Type,val1,val2,val3
0,0.000000,Accelerometer,8.773438,-5.707031,1.469727
1,0.010224,Accelerometer,8.757812,-5.949219,1.730469
2,0.020185,Accelerometer,8.789062,-6.320312,1.781250
3,0.030409,Accelerometer,8.898438,-6.441406,1.639648
4,0.040370,Accelerometer,9.046875,-6.257812,1.440430
...,...,...,...,...,...
8278,27.956871,Accelerometer,13.406250,-3.316406,8.156250
8279,28.028961,Magnetometer,62.093750,-19.296875,7.597656
8280,27.985445,Gyroscope,-0.335205,0.870117,-2.585938
8281,27.966833,Accelerometer,13.390625,-3.128906,8.046875


In [7]:
df_acc = df_imu.loc[df_imu[' Sensor Type'] == 'Accelerometer']
df_mag = df_imu.loc[df_imu[' Sensor Type'] == 'Magnetometer']
df_gyr = df_imu.loc[df_imu[' Sensor Type'] == 'Gyroscope']
df_acc

Unnamed: 0,Sample Time [s],Sensor Type,val1,val2,val3
0,0.000000,Accelerometer,8.773438,-5.707031,1.469727
1,0.010224,Accelerometer,8.757812,-5.949219,1.730469
2,0.020185,Accelerometer,8.789062,-6.320312,1.781250
3,0.030409,Accelerometer,8.898438,-6.441406,1.639648
4,0.040370,Accelerometer,9.046875,-6.257812,1.440430
...,...,...,...,...,...
8269,27.926462,Accelerometer,13.851562,-2.164062,8.164062
8272,27.936686,Accelerometer,13.531250,-2.667969,8.320312
8275,27.946648,Accelerometer,13.359375,-3.166016,8.265625
8278,27.956871,Accelerometer,13.406250,-3.316406,8.156250


In [8]:
acc_ts = df_acc['Sample Time [s]'].values
mag_ts = df_mag['Sample Time [s]'].values
gyr_ts = df_gyr['Sample Time [s]'].values

len(acc_ts), len(mag_ts), len(gyr_ts)

(2770, 2764, 2749)

In [9]:
gyr_ts[-1], mag_ts[-16]

(27.985445, 27.887665)

In [10]:
idx_ts = list(zip(acc_ts, mag_ts, gyr_ts))
idx_ts[0], idx_ts[-1]

((0.0, 0.23225959, 0.3287286), (27.754759, 27.887665, 27.985445))

In [11]:
start = max(idx_ts[0])
end = min(idx_ts[-1])
longest = max(acc_ts[-1], mag_ts[-1], gyr_ts[-1])

## Run getting start and end times

In [18]:
df_ts  = pd.DataFrame(columns=['file', 'start', 'end', 'longest'])
missing = []
for path, valid_file in zip(imu_files, valid_files):
    if valid_file.split('/')[1] not in path.stem:
        raise Exception('path mismatch in valid files and imu paths')

    df_imu = pd.read_csv(path)
    if df_imu.empty == True:
        raise Exception('Empty imu df')
    try:
        # Rename sensors
        df_imu.loc[df_imu[" Sensor Type"] == 1, " Sensor Type"] = "Accelerometer"
        df_imu.loc[df_imu[" Sensor Type"] == 2, " Sensor Type"] = "Magnetometer"
        df_imu.loc[df_imu[" Sensor Type"] == 4, " Sensor Type"] = "Gyroscope"

        df_acc = df_imu.loc[df_imu[' Sensor Type'] == 'Accelerometer']
        df_mag = df_imu.loc[df_imu[' Sensor Type'] == 'Magnetometer']
        df_gyr = df_imu.loc[df_imu[' Sensor Type'] == 'Gyroscope']

        if df_acc.empty == True or df_mag.empty == True or df_gyr.empty == True:
            raise Exception(f'Empty sensor in df\t {valid_file}')

        acc_ts = df_acc['Sample Time [s]'].values
        mag_ts = df_mag['Sample Time [s]'].values
        gyr_ts = df_gyr['Sample Time [s]'].values

        idx_ts = list(zip(acc_ts, mag_ts, gyr_ts))
        start = max(idx_ts[0])
        end = min(idx_ts[-1])
        longest = max(acc_ts[-1], mag_ts[-1], gyr_ts[-1])

        df = pd.DataFrame({
            'file': [valid_file],
            'start': [start],
            'end': [end],
            'longest': [longest]
        })

        df_ts = pd.concat([df_ts, df], ignore_index=True)
    except Exception as e:
        print(e)
        missing.append(valid_file)
        continue
df_ts

Unnamed: 0,file,start,end,longest
0,S01/S01_E1_R_1,0.328729,27.754759,28.039185
1,S01/S01_E1_R_2,0.428081,22.795780,23.139450
2,S01/S01_E2_R_1,0.270008,39.955727,40.225212
3,S01/S01_E2_R_2,0.241172,41.835823,42.030598
4,S01/S01_E3_R_1,0.207880,49.963596,50.155487
...,...,...,...,...
1247,S84/S84_E7_R_2,0.182452,45.496140,45.796560
1248,S84/S84_E8_R_1,0.122159,37.645977,37.837864
1249,S84/S84_E8_R_2,0.132121,33.315880,33.518257
1250,S84/S84_E9_R_1,0.044040,59.626750,59.808678


In [13]:
# Remove files with missing imu sensor data from valid files 
for i in missing:
    if i in valid_files:
        valid_files.remove(i)
    else:
        print(f'{i} already removed')

# Re-write .txt file so we dont process 
with open(Path(f'{root_path.joinpath("valid_files.txt")}'), 'w') as f:
    for line in sorted(valid_files):
        f.writelines([line + '\n'])

In [14]:
# Write df_ts to csv
df_ts.to_csv(root_path.joinpath('start_end.csv'), index=False)

# Resample IMU

In [61]:
def load_imu_dfs(fp):
    df = pd.read_csv(fp)
    df = df.rename(columns={" Sensor Type": "Sensor Type"})
    subdfs = []
    for sensor_type, sensor_name, decimals in zip([1, 2, 4], ["a_", "m_", "g_"], [2, 1, 3]):
        # Select data by sensor type
        subdf = df.loc[df["Sensor Type"] == sensor_type, :]
        # Conver index to timedeltas, same as the MOCAP
        subdf.index = subdf["Sample Time [s]"].values
        subdf.index = pd.to_timedelta(subdf.index, unit="seconds")
        # Deal with the columns
        subdf = subdf.drop(columns=["Sample Time [s]", "Sensor Type"])
        new_column_map = {f"val{i}": f"{sensor_name}{i}" for i in range(1, 4)}
        subdf = subdf.rename(columns=new_column_map)
        # Round for easier stats
        subdf = subdf.round(decimals)
        subdfs.append(subdf)
    return subdfs

In [62]:
start = pd.Timedelta(df_ts.loc[df_ts["file"] == valid_files[0], "start"][0], unit="s")
end = pd.Timedelta(df_ts.loc[df_ts["file"] == valid_files[0], "end"][0], unit="s")
start, end

(Timedelta('0 days 00:00:00.328728600'), Timedelta('0 days 00:00:27.754759'))

In [63]:
def align_df(df: pd.DataFrame, start: pd.Timedelta, end: pd.Timedelta):
    """
    df: pd.DataFrame with pd.Timedelta as indices
    start: pd.Timedelta start time
    end: pd.Timedelta end time
    """
    mask = (df.index > start) & (df.index < end)
    df = df[mask]
    df = df.resample("0.02S").mean()  # Resample to 50 Hz
    df = df.reset_index(drop=True)
    return df

In [64]:
imu_dfs = load_imu_dfs(imu_files[0])
imu_dfs[0]

Unnamed: 0,a_1,a_2,a_3
0 days 00:00:00,8.77,-5.71,1.47
0 days 00:00:00.010223616,8.76,-5.95,1.73
0 days 00:00:00.020185089,8.79,-6.32,1.78
0 days 00:00:00.030408705,8.90,-6.44,1.64
0 days 00:00:00.040370177,9.05,-6.26,1.44
...,...,...,...
0 days 00:00:27.926462,13.85,-2.16,8.16
0 days 00:00:27.936686,13.53,-2.67,8.32
0 days 00:00:27.946648,13.36,-3.17,8.27
0 days 00:00:27.956871,13.41,-3.32,8.16


In [72]:
imu_df = []
for df in imu_dfs:
    df = align_df(df, start, end)
    imu_df.append(df)
imu_df = pd.concat(imu_df, axis=1)
imu_df

Unnamed: 0,a_1,a_2,a_3,m_1,m_2,m_3,g_1,g_2,g_3
0,12.500,-1.665,5.600,6.333333,-76.30,-184.533333,0.534333,-1.069667,1.385333
1,12.660,-1.490,5.155,6.550000,-74.70,-181.450000,0.540000,-0.761000,1.519000
2,12.965,-1.330,4.935,7.350000,-74.25,-180.350000,0.538000,-0.506500,1.609000
3,13.020,-0.870,4.370,8.300000,-70.80,-178.100000,0.531667,-0.091333,1.717667
4,13.055,-0.765,3.675,8.100000,-70.20,-177.900000,0.535000,0.230000,1.776000
...,...,...,...,...,...,...,...,...,...
1366,12.170,2.925,1.725,65.000000,3.20,-20.900000,-1.475500,-1.824000,0.545000
1367,12.740,1.675,1.150,64.700000,3.80,-17.900000,-1.779500,-1.811000,0.257500
1368,13.340,0.210,3.005,66.100000,4.70,-14.300000,-2.245000,-1.798500,0.001000
1369,13.280,-0.655,3.375,65.200000,5.60,-12.800000,-2.291000,-1.849000,-0.244500


## Run IMU resampling 

In [89]:
for imu_file, valid_file in zip(imu_files, valid_files):
    if valid_file.split('/')[1] not in imu_file.stem:
        raise Exception('path mismatch in valid files and imu paths')
    
    imu_dfs = load_imu_dfs(imu_file)
    imu_df = []
    for df in imu_dfs:
        df = align_df(df, start, end)
        imu_df.append(df)
    imu_df = pd.concat(imu_df, axis=1)
    
    imu_df.to_csv(imu_file.parent.joinpath(f'{valid_file.split("/")[1]}_imu_p.csv'), index=False)

# Remove NaNs from mocap files
Note the valid files were filtered in case of missing IMU sensor data

NaN Rules:
- If there are more than 2 breakpoints and more than 100 points, don't bother filling, don't use the data
- If there are only 2 breakpoints at the ends, just drop the ends
- If there are less than 100 points but they're in the middle, fill with linear interpolation

In [16]:
mocap_file = mocap_files[0]
mocap_file

WindowsPath('E:/Datasets/0-Processed_Datasets/2023-11-15_imu-vicon-data/S01/S01_E1_R_1_mocap_tfm.csv')

In [17]:
df = pd.read_csv(mocap_file)

In [None]:
'''
for mocap_file, valid_file in zip(mocap_files, valid_files):
    check files match
    round start to nearest 50Hz value (max)
    round end to nearest 50Hz value (min)

    get index of mocap df (time*50?)
    cut mocap df to start and end
    
'''