# Feature extraction

## Imports and data loading

In [1]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from cycler import cycler
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append("..")
from dissect.imu_feature_extraction import compute_feature_all


sr = 300.  # sampling rate, Hz

RGB = ["#D55E00", "#009E73", "#0072B2"]
RGB_CYCLER = cycler(color=RGB)

In [2]:
# Preprocessed IMU (see 00_preprocessing.ipynb)
imu = pd.read_pickle("../data_example/example_imu.pickle")
imu.head(5)

Unnamed: 0_level_0,ax,ay,az,gx,gy,gz,qx,qy,qz,qw,ax_G,ay_G,az_G,ax_R,ay_R,az_R
counter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
703455,-0.437201,0.124428,0.529188,-51.78833,74.554443,15.106201,0.108667,0.330012,-0.038276,0.93692,-0.626708,0.178362,0.758567,0.189507,-0.053934,-0.229379
703456,-0.48725,0.114418,0.469129,-61.309814,59.326172,13.824463,0.110625,0.389789,-0.04164,0.913287,-0.721191,0.169602,0.671654,0.233941,-0.055184,-0.202525
703457,-0.593939,-0.029137,0.406385,-63.079834,46.936035,13.214111,0.329275,0.191478,-0.768666,0.513874,-0.702997,0.044046,0.709828,0.109057,-0.073183,-0.303443
703458,-0.818793,-0.14193,0.328016,-51.116943,38.574219,7.049561,0.386625,0.197573,-0.860031,0.268016,-0.770924,-0.132595,0.622973,-0.047869,-0.009335,-0.294957
703459,-1.048041,-0.175133,0.322156,-39.978027,27.46582,6.164551,0.41326,0.183231,-0.870297,0.195514,-0.790967,-0.157334,0.591285,-0.257075,-0.017799,-0.269129


In [5]:
# Precomputed change points (see 01_changepoint.ipynb)
chpt_df = pd.read_pickle("../data_example/example_chpt.pickle")
print(len(chpt_df))
chpt_df.head(5)

3294


Unnamed: 0,iloc_start,iloc_end
0,0,198
1,198,244
2,244,300
3,300,344
4,344,457


## Compute all features relying on the signals' statistics and AHRS filtering of the data

In [6]:
%%time
features = compute_feature_all(imu, chpt_df.iloc_end.values, sr=sr)  # the column names can also be specified here as kwargs
features.head(5)

CPU times: user 3min 25s, sys: 4.54 s, total: 3min 30s
Wall time: 19.4 s


Unnamed: 0,duration,duration_log10,gx_mean,gy_mean,gz_mean,ax_G_mean,ay_G_mean,az_G_mean,ax_R_mean,ay_R_mean,...,azimuth3d_speed,azimuth2d_cumulative_change,azimuth2d_cumulative_speed,azimuth2d_change_mean,azimuth2d_change_std,azimuth2d_change_min,azimuth2d_change_max,azimuth2d_change_p25,azimuth2d_change_median,azimuth2d_change_p75
0,0.66,-0.180456,-6.283847,-7.42579,8.337618,-0.858151,-0.002136,0.509406,0.0066,-0.001709,...,124.801065,6.212065,9.41222,6.059199,2.539103,0.19071,8.898763,5.334605,7.174027,7.696399
1,0.153333,-0.814363,9.501582,-472.663548,34.122633,-0.191561,-0.002943,0.882835,0.048385,0.007185,...,523.298773,5.193212,33.868772,-0.87695,2.325896,-3.509091,5.193212,-2.803769,-1.439262,0.489726
2,0.186667,-0.728933,17.908914,5.161285,15.580314,0.498023,-0.038409,0.865547,-0.025447,-0.00106,...,24.175137,3.856314,20.658823,4.549894,0.967599,0.620489,5.235514,4.568945,4.827215,5.086856
3,0.146667,-0.833669,-38.132407,202.400208,-329.451127,0.189041,0.027026,0.968391,-0.007795,-0.140813,...,309.239416,-47.190286,-321.751953,-24.246075,14.697536,-47.190286,-0.513494,-37.66227,-24.195243,-11.537651
4,0.376667,-0.424043,25.971269,30.303685,-77.753388,-0.294186,0.020826,0.949658,-0.143683,0.064155,...,48.30165,-30.059363,-79.803619,-19.816479,11.63922,-32.10276,-0.639118,-31.235113,-23.04925,-7.302276


In [7]:
len(features)  # -> one row per segment

3294

## Compute features relying on time-frequency analysis of the data
**Warning** In the analyses presented in the article, we relied on an implementation in R for the continuous wavelet transform. For convenience, here is a pure python function for extracting these features but the exact resulting values might slightly differ from the ones in the feature dataframe used in the subsequent notebooks.

In [8]:
import ssqueezepy 