In [7]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler

In [8]:
from src.globals import *

In [9]:
df_imu = pd.read_parquet(path_imu)
df_motor = pd.read_parquet(path_motor)

n_estimators = 500
contamination = 0.02
random_state=42

In [10]:
# Min Max Normalization IMU
for sensor, axes in imu_sensor_axes:
    columns = [f'{sensor}_{axis}' for axis in axes]
    values = df_imu[columns].values

    scaler = MinMaxScaler((-1, 1)).fit(values.reshape(-1, 1))
    for col in columns:
        df_imu[col] = scaler.transform(df_imu[col].values.reshape(-1, 1))

df_motor[motor_sensors_u] = MinMaxScaler((-1, 1)).fit_transform(df_motor[motor_sensors_u])

In [None]:
iso_forest= IsolationForest(n_estimators=n_estimators, contamination=contamination, n_jobs=-1)
lofcols = []

for sensor, axes in imu_sensor_axes:
    X = df_imu[['timeindex_bin'] + [f'{sensor}_{axis}' for axis in axes]]
    df_imu[f'if_{sensor}'] = iso_forest.fit(X).score_samples(X) * -1
    lofcols = lofcols + [f'if_{sensor}']

X = df_motor[['timeindex_bin'] + motor_sensors_u]
df_motor['if_motor'] = iso_forest.fit(X).score_samples(X) * -1

In [16]:
df_imu_results = pd.read_parquet(path_imu_results)
df_motor_results = pd.read_parquet(path_motor_results)

df_imu_results = df_imu_results.merge(df_imu[['seqid', 'timeindex_bin'] + lofcols], on=['seqid', 'timeindex_bin'], suffixes=('', '_drop'))
df_imu_results = df_imu_results.drop([col for col in df_imu_results.columns if col.endswith('_drop')], axis=1)

df_motor_results = df_motor_results.merge(df_motor[['seqid', 'timeindex_bin', 'if_motor']], on=['seqid', 'timeindex_bin'], suffixes=('', '_drop'))
df_motor_results = df_motor_results.drop([col for col in df_motor_results.columns if col.endswith('_drop')], axis=1)

df_imu_results.to_parquet(path_imu_results)
df_motor_results.to_parquet(path_motor_results)