In [1]:
import pandas as pd
from scipy.stats import linregress, median_abs_deviation
import numpy as np
import matplotlib.pyplot as plt
import os
from functools import partial

from src.globals import *

In [2]:
# Loading & Preprocessing
df_imu = pd.read_parquet(path_imu)
df_motor = pd.read_parquet(path_motor)

scores = ['z', 'mz']

In [5]:
def calc_mz_z_scores(df, columns):
    agg_funcs = ['mean', 'std', median_abs_deviation]

    df_tmp = df.groupby('timeindex_bin', as_index=False).agg({col : agg_funcs for col in columns})
    df_tmp.columns = [col[0] if col[0] == 'timeindex_bin' else '_'.join(col).strip()  for col in df_tmp.columns]

    df = df.merge(df_tmp, on=['timeindex_bin'])
    for x in columns:
        df[f'z_{x}'] = (df[x] - df[x + '_mean']) / df[x + '_std']
        df[f'mz_{x}'] = (0.6745*(df[x] - df[x + '_mean'])) / df[x + '_median_abs_deviation']

    return df.drop(columns=[col for col in df_tmp.columns if any(col.endswith(suffix) for suffix in ['mean', 'std', 'median_abs_deviation'])])

def combine_mz_z_scores(df, sensor, axes, squared=False):
    func = np.abs if not squared else np.sqrt
    
    z_cols = [f'z_{sensor}_{axis}' for axis in axes]
    mz_cols = [f'mz_{sensor}_{axis}' for axis in axes]

    df[f'z_{sensor}'] = func(df[z_cols]).sum(axis=1) / len(z_cols)
    df[f'mz_{sensor}'] = func(df[mz_cols]).sum(axis=1) / len(mz_cols)

    return df.drop(columns=z_cols + mz_cols)

In [6]:
df_imu = calc_mz_z_scores(df_imu, imu_sensors_u)
df_motor = calc_mz_z_scores(df_motor, motor_sensors_u)

for (sensor, axes) in imu_sensor_axes:
    df_imu = combine_mz_z_scores(df_imu, sensor, axes)
df_motor = combine_mz_z_scores(df_motor, motor_sensors[0], motor_axes)

In [8]:
cols_imu_results = [f'{score}_{sensor}' for sensor in imu_sensors for score in scores]
cols_motor_results = [f'{score}_{sensor}' for sensor in motor_sensors for score in scores]

# Saving it to the tracks db
df_imu_results = pd.read_parquet(path_imu_results)
df_imu_results = df_imu_results.merge(df_imu[['seqid', 'timeindex_bin'] + cols_imu_results], on=['seqid', 'timeindex_bin'] )
df_imu_results.to_parquet(path_imu_results)

df_motor_results = pd.read_parquet(path_motor_results)
df_motor_results = df_motor_results.merge(df_motor[['seqid', 'timeindex_bin'] + cols_motor_results], on=['seqid', 'timeindex_bin'])
df_motor_results.to_parquet(path_motor_results)

# Series

In [8]:
def func(x):
    return np.sum(abs(x) >= threshold_z)

df_imu_series = df_imu.groupby(['seqid']).agg({f'{sensor}_{score}' : func for sensor in imu_sensors for score in scores})
df_motor_series = df_motor.groupby(['seqid']).agg({f'output_{score}' : func for score in scores})
df_series = df_imu_series.merge(df_motor_series, on='seqid')

In [11]:
df_series_results = pd.read_parquet(path_series_results)

df_series_results = df_series_results.merge(df_series, on=['seqid'], suffixes=('', '_drop'))
df_series_results = df_series_results.drop([col for col in df_series_results.columns if col.endswith('_drop')], axis=1)

df_series_results.to_parquet(path_series_results)