In [1]:
%matplotlib widget

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys
import h5py

from PfyMU.gait.train_classifier.core import load_datasets_v2

In [3]:
steps = {
    'rope-jumping': 0.1,
    'descending-stairs': 0.1,
    'ascending-stairs': 0.1,
    'jump': 0.1,
    'lying': 0.1,
    'elevator-down': 0.1,
    'elevator-up': 0.1,
    'run': 0.05,
    'sweeping': 0.1,
    'walking-downstairs': 0.1,
    'walking-upstairs': 0.1,
    'stand': 0.15,
    'treadmill-running': 0.1,
    'cycling-50W': 0.1,
    'cycling-100W': 0.1,
    'walking-left': 0.15,
    'walking-right': 0.15,
    'walking-forward': 0.15,
    'impaired-walking': 0.15,
    'walking': 0.25,
    'sitting': 350,
    'default': 0.5
}
steps = 0.5

In [4]:
# gait_sets_path = Path('/Users/adamol/Documents/Datasets/gait/processed')
gait_sets_path = Path('/Users/ladmin/Desktop/gait_dataset_cleaning/processed/')

datasets = [
    gait_sets_path / 'basa',
    gait_sets_path / 'daliac',
    gait_sets_path / 'fallalld',
]

X, Y, subjects, activities = load_datasets_v2(
    datasets,
    device_location = 'Wrist',
    goal_fs=20.0, 
    acc_mag=False, 
    window_length=3.0, 
    window_step=steps
)

processing dataset /Users/ladmin/Desktop/gait_dataset_cleaning/processed/basa
processing dataset /Users/ladmin/Desktop/gait_dataset_cleaning/processed/daliac
processing dataset /Users/ladmin/Desktop/gait_dataset_cleaning/processed/fallalld


## Dataset class summary

In [5]:
print('Total samples (3.0s windows): ', Y.size)
print('Total walking samples: ', Y.sum())
print('Total non-walking samples: ', Y.size - Y.sum(), '\n')
print(f'% walking samples: {Y.sum() / Y.size * 100:.2f}')

Total samples (3.0s windows):  141913
Total walking samples:  33130
Total non-walking samples:  108783 

% walking samples: 23.35
[[[ 3.003300e-01 -8.899500e-01  2.460300e-01]
  [ 3.003300e-01 -8.820200e-01  2.379900e-01]
  [ 3.003300e-01 -9.018400e-01  2.420100e-01]
  ...
  [ 2.462700e-01 -5.768200e-01  4.550600e-01]
  [ 3.003300e-01 -7.234800e-01  5.073200e-01]
  [ 2.424100e-01 -8.939100e-01  4.992800e-01]]

 [[ 3.736900e-01 -8.859800e-01  4.269200e-01]
  [ 3.041900e-01 -8.304900e-01  3.706500e-01]
  [ 1.458900e-01 -7.829300e-01  2.862300e-01]
  ...
  [ 1.729100e-01 -7.670800e-01  1.374900e-01]
  [ 2.733000e-01 -5.292600e-01  1.656300e-01]
  [ 2.501300e-01 -4.856600e-01  1.334700e-01]]

 [[ 2.810200e-01 -1.052500e+00  4.751600e-01]
  [ 2.926000e-01 -7.234800e-01  3.425100e-01]
  [ 7.597900e-01 -1.440900e+00  6.801800e-01]
  ...
  [ 4.818000e-01 -1.401300e+00  4.028100e-01]
  [ 2.655800e-01 -1.064300e+00  3.666300e-01]
  [ 2.848800e-01 -6.759100e-01  4.148700e-01]]

 ...

 [[ 9.801480

In [6]:
unq_act, act_ct = np.unique(activities, return_counts=True)
N = np.sum(act_ct)
si = np.argsort(act_ct)
for a, c in zip(unq_act[si], act_ct[si]):
    print(f'{a:25s}: {c:5d} / {N:5d}{c/N:8.2f}')

standing-faint-backward-norota:    84 / 141913    0.00
sitting-faint-backward-norotat:    84 / 141913    0.00
sitting-faint-forward-norotati:    96 / 141913    0.00
standing-faint-forward-norotat:    96 / 141913    0.00
standing-faint-vertical-norota:   132 / 141913    0.00
standing-faint-lateral-norotat:   168 / 141913    0.00
walking-faint-backward-norotat:   180 / 141913    0.00
sitting-faint-lateral-norotati:   192 / 141913    0.00
sit-lie-lose-balance-backward-:   192 / 141913    0.00
walking-faint-lateral-norotati:   192 / 141913    0.00
walking-faint-forward-norotati:   192 / 141913    0.00
sit-lie-lose-balance-forward-n:   204 / 141913    0.00
fast-descending-stairs   :   276 / 141913    0.00
single-raise-and-lower-hand:   300 / 141913    0.00
stop-descending-stairs   :   300 / 141913    0.00
stop-descending-elevator :   300 / 141913    0.00
stop-ascending-stairs    :   300 / 141913    0.00
sit-lie-lose-balance-lateral-n:   300 / 141913    0.00
start-descending-elevator:   300 

## Feature Generation

In [7]:
from PfyMU.features import *

In [8]:
FB = Bank(window_length=None, window_step=None)

# add features
FB + Mean()
FB + MeanCrossRate()
FB + StdDev()
FB + Skewness()
FB + Kurtosis()
FB + Range()
FB + IQR()
FB + RMS()
FB + Autocorrelation(lag=1, normalize=True)
FB + LinearSlope()
FB + SignalEntropy()
FB + SampleEntropy(m=4, r=1.0)
FB + PermutationEntropy(order=3, delay=1, normalize=True)
FB + ComplexityInvariantDistance(normalize=True)
FB + RangeCountPercentage(range_min=0, range_max=1.0)
FB + RatioBeyondRSigma(r=2.0)
FB + JerkMetric(normalize=True)
FB + DimensionlessJerk(log=True, signal_type='acceleration')
FB + SPARC()
FB + DominantFrequency(low_cutoff=0.25, high_cutoff=5.0)
FB + DominantFrequencyValue(low_cutoff=0.25, high_cutoff=5.0)
FB + PowerSpectralSum(low_cutoff=0.25, high_cutoff=5.0)
FB + SpectralFlatness(low_cutoff=0.25, high_cutoff=5.0)
FB + SpectralEntropy(low_cutoff=0.25, high_cutoff=5.0)
FB + DetailPower(wavelet='coif4', freq_band=[1.0, 3.0])
FB + DetailPowerRatio(wavelet='coif4', freq_band=[1.0, 3.0])

In [9]:
X_feat = FB.compute(X, fs=20.0, windowed=True)

  "boundary effects.").format(level))


## Feature Exploration

In [13]:
feats = pd.DataFrame(
    index=range(X_feat.shape[0]), 
#     columns=['Subject', 'Activity', 'Label'] + [i.parent._name for i in FB._feat_list],
    columns=['Label'] + [i.parent._name for i in FB._feat_list],
    dtype='float'
)
# feats['Subject'] = feats.Subject.astype('str')
# feats['Activity'] = feats.Activity.astype('str')
feats.head(10)

# feats.iloc[:, 1:] = X_feat
# feats['Label'] = Y
# feats['Label'] = feats.Label.astype('int')
# # feats['Subject'] = subjects
# # feats['Activity'] = activities

# feat_names = [i.parent._name for i in FB._feat_list]

Unnamed: 0,Label,Mean,MeanCrossRate,StdDev,Skewness,Kurtosis,Range,IQR,RMS,Autocorrelation,...,JerkMetric,DimensionlessJerk,SPARC,DominantFrequency,DominantFrequencyValue,PowerSpectralSum,SpectralFlatness,SpectralEntropy,DetailPower,DetailPowerRatio
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [11]:
# feats.to_hdf('features.h5', key='no_preprocessing')

### Feature distributions

In [12]:
plt.close('all')

f, ax = plt.subplots(nrows=7, ncols=4, figsize=(10, 10), sharex=True)

j, k = 0, 0
for i, ft in enumerate([i.parent._name for i in FB._feat_list]):
    if k > 3:
        j += 1
        k  = 0
    sns.violinplot(x='Label', y=ft, data=feats, ax=ax[j, k])
    ax[j, k].set_ylabel(None)
    ax[j, k].set_title(ft)
    ax[j, k].set_xlabel(None)
    
    k += 1

for k in range(4):
    ax[-1, k].set_xticklabels(['NG', 'G'])

f.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Predictive Power Score

In [13]:
import ppscore

In [14]:
df_predictors = ppscore.predictors(feats, 'Label', output='df')
plt.figure()
ax = sns.barplot(data=df_predictors, x="x", y="ppscore")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
pps_matrix = ppscore.matrix(feats)

In [16]:
plt.figure(figsize=(15, 10))
sns.heatmap(pps_matrix, vmin=0, vmax=1, cmap="Blues", linewidths=0.5, annot=True)
plt.tight_layout()
# plt.savefig('PPScore_matrix.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler

In [21]:
pca = PCA(n_components=5)
x_pca = pca.fit_transform(RobustScaler().fit_transform(feats))
df_pca = pd.DataFrame(x_pca, columns=[f'PC {i+1}' for i in range(x_pca.shape[1])])
df_pca['Label'] = Y
df_pca.head(2)

Unnamed: 0,PC 1,PC 2,PC 3,PC 4,PC 5,Label
0,-1.711513,5.869291,-1.432711,2.230261,-2.287106,1
1,-1.741723,5.418514,-0.500769,1.859952,-2.408586,1


In [22]:
sns.pairplot(df_pca, hue='Label')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<seaborn.axisgrid.PairGrid at 0x7fae95e75250>

In [23]:
from separation_stats import db_2class, corr_select, cohen_d, ttest_select, mwu_select, auc_score

In [24]:
sep_df = pd.DataFrame()
sep_df['Feature'] = feat_names

sep_df['DBI'], db_rank = db_2class(X_feat, Y)
sep_df['Correlation r'] = corr_select(X_feat, Y)
sep_df['T-test p'], sep_df['Cohens d'] = ttest_select(X_feat, Y)
sep_df['MWU-test p'], _ = mwu_select(X_feat, Y)
sep_df['AUC'] = auc_score(X_feat, Y)

sep_df['Cohens d'] = sep_df['Cohens d'].abs()
sep_df['AUC'] = sep_df['AUC'].apply(lambda i: 1 - i if i < 0.5 else i)

In [25]:
sep_df.sort_values('DBI')

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
21,PowerSpectralSum,0.965947,-0.439613,0.0,1.07338,0.0,0.783729
20,DominantFrequencyValue,1.00371,-0.294097,0.0,0.674816,0.0,0.710043
23,SpectralEntropy,1.237202,0.412201,0.0,0.992193,0.0,0.79372
19,DominantFrequency,1.298202,0.087434,1.997463e-244,0.192486,0.0,0.574323
12,PermutationEntropy,1.497455,-0.434787,0.0,1.058831,0.0,0.794782
22,SpectralFlatness,1.678056,0.255478,0.0,0.57951,0.0,0.689853
17,DimensionlessJerk,2.057174,-0.334707,0.0,0.778961,0.0,0.675344
16,JerkMetric,2.547032,-0.005624,0.03205565,0.012335,0.0,0.675344
6,IQR,2.64583,-0.022779,3.844914e-18,0.04997,0.0,0.664672
1,MeanCrossRate,2.646177,-0.33574,0.0,0.78167,0.0,0.708788


In [26]:
sep_df.sort_values('Cohens d', ascending=False)

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
21,PowerSpectralSum,0.965947,-0.439613,0.0,1.07338,0.0,0.783729
12,PermutationEntropy,1.497455,-0.434787,0.0,1.058831,0.0,0.794782
23,SpectralEntropy,1.237202,0.412201,0.0,0.992193,0.0,0.79372
1,MeanCrossRate,2.646177,-0.33574,0.0,0.78167,0.0,0.708788
17,DimensionlessJerk,2.057174,-0.334707,0.0,0.778961,0.0,0.675344
13,ComplexityInvariantDistance,2.694615,-0.316375,0.0,0.731397,0.0,0.709979
8,Autocorrelation,3.384121,0.316068,0.0,0.73061,0.0,0.710173
20,DominantFrequencyValue,1.00371,-0.294097,0.0,0.674816,0.0,0.710043
22,SpectralFlatness,1.678056,0.255478,0.0,0.57951,0.0,0.689853
14,RangeCountPercentage,6.313728,0.215739,0.0,0.484539,0.0,0.629582


In [27]:
sep_df.sort_values('AUC', ascending=False)

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
12,PermutationEntropy,1.497455,-0.434787,0.0,1.058831,0.0,0.794782
23,SpectralEntropy,1.237202,0.412201,0.0,0.992193,0.0,0.79372
21,PowerSpectralSum,0.965947,-0.439613,0.0,1.07338,0.0,0.783729
8,Autocorrelation,3.384121,0.316068,0.0,0.73061,0.0,0.710173
20,DominantFrequencyValue,1.00371,-0.294097,0.0,0.674816,0.0,0.710043
13,ComplexityInvariantDistance,2.694615,-0.316375,0.0,0.731397,0.0,0.709979
1,MeanCrossRate,2.646177,-0.33574,0.0,0.78167,0.0,0.708788
22,SpectralFlatness,1.678056,0.255478,0.0,0.57951,0.0,0.689853
16,JerkMetric,2.547032,-0.005624,0.03205565,0.012335,0.0,0.675344
17,DimensionlessJerk,2.057174,-0.334707,0.0,0.778961,0.0,0.675344


### Pair plots (Top Features)

In [28]:
top_feats = ['PowerSpectralSum', 'PermutationEntropy', 'SpectralEntropy', 'DominantFrequency', 'DimensionlessJerk', 'MeanCrossRate']

sns.pairplot(feats, hue='Label', vars=top_feats)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<seaborn.axisgrid.PairGrid at 0x7fae8ceee190>