In [1]:
%matplotlib widget

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

from PfyMU.gait.train_classifier.core import load_datasets

In [3]:
gait_sets_path = Path('/Users/adamol/Documents/Datasets/gait/processed')

datasets = [
    gait_sets_path / 'bluesky2',
    gait_sets_path / 'daliac',
    gait_sets_path / 'ltmm',
    gait_sets_path / 'usc-had'
]

X, Y, subjects, activities = load_datasets(datasets, goal_fs=50.0, acc_mag=True, window_length=3.0, window_step=[0.75, 0.25])

## Dataset class summary

In [4]:
print('Total samples (3.0s windows): ', Y.size)
print('Total walking samples: ', Y.sum())
print('Total non-walking samples: ', Y.size - Y.sum(), '\n')
print(f'% walking samples: {Y.sum() / Y.size * 100:.2f}')

Total samples (3.0s windows):  74926
Total walking samples:  29963
Total non-walking samples:  44963 

% walking samples: 39.99


## Feature Generation

In [5]:
from PfyMU.features import *

In [6]:
FB = Bank(window_length=None, window_step=None)

# add features
FB + Mean()
FB + MeanCrossRate()
FB + StdDev()
FB + Skewness()
FB + Kurtosis()
FB + Range()
FB + IQR()
FB + RMS()
FB + Autocorrelation(lag=1, normalize=True)
FB + LinearSlope()
FB + SignalEntropy()
FB + SampleEntropy(m=4, r=1.0)
FB + PermutationEntropy(order=3, delay=1, normalize=True)
FB + ComplexityInvariantDistance(normalize=True)
FB + RangeCountPercentage(range_min=0, range_max=1.0)
FB + RatioBeyondRSigma(r=2.0)
FB + JerkMetric(normalize=True)
FB + DimensionlessJerk(log=True, signal_type='acceleration')
FB + SPARC()
FB + DominantFrequency(low_cutoff=0.25, high_cutoff=5.0)
FB + DominantFrequencyValue(low_cutoff=0.25, high_cutoff=5.0)
FB + PowerSpectralSum(low_cutoff=0.25, high_cutoff=5.0)
FB + SpectralFlatness(low_cutoff=0.25, high_cutoff=5.0)
FB + SpectralEntropy(low_cutoff=0.25, high_cutoff=5.0)
FB + DetailPower(wavelet='coif4', freq_band=[1.0, 3.0])
FB + DetailPowerRatio(wavelet='coif4', freq_band=[1.0, 3.0])

In [7]:
X_feat = FB.compute(X, fs=50.0, windowed=True)



## Feature Exploration

In [8]:
feats = pd.DataFrame(
    index=range(X_feat.shape[0]), 
#     columns=['Subject', 'Activity', 'Label'] + [i.parent._name for i in FB._feat_list],
    columns=['Label'] + [i.parent._name for i in FB._feat_list],
    dtype='float'
)
# feats['Subject'] = feats.Subject.astype('str')
# feats['Activity'] = feats.Activity.astype('str')

feats.iloc[:, 1:] = X_feat
feats['Label'] = Y
feats['Label'] = feats.Label.astype('int')
# feats['Subject'] = subjects
# feats['Activity'] = activities

feat_names = [i.parent._name for i in FB._feat_list]

### Feature distributions

In [9]:
plt.close('all')

f, ax = plt.subplots(nrows=7, ncols=4, figsize=(10, 10), sharex=True)

j, k = 0, 0
for i, ft in enumerate([i.parent._name for i in FB._feat_list]):
    if k > 3:
        j += 1
        k  = 0
    sns.violinplot(x='Label', y=ft, data=feats, ax=ax[j, k])
    ax[j, k].set_ylabel(None)
    ax[j, k].set_title(ft)
    ax[j, k].set_xlabel(None)
    
    k += 1

for k in range(4):
    ax[-1, k].set_xticklabel(['NG', 'G'])

f.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Predictive Power Score

In [10]:
import ppscore

In [11]:
df_predictors = ppscore.predictors(feats, 'Label', output='df')
plt.figure()
ax = sns.barplot(data=df_predictors, x="x", y="ppscore")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
pps_matrix = ppscore.matrix(feats)

In [13]:
plt.figure(figsize=(15, 10))
sns.heatmap(pps_matrix, vmin=0, vmax=1, cmap="Blues", linewidths=0.5, annot=True)
plt.tight_layout()
# plt.savefig('PPScore_matrix.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
from sklearn.decomposition import PCA

In [15]:
pca = PCA(n_components=5)
x_pca = pca.fit_transform(feats)
df_pca = pd.DataFrame(x_pca, columns=[f'PC {i+1}' for i in range(x_pca.shape[1])])
df_pca['Label'] = Y
df_pca.head(2)

Unnamed: 0,PC 1,PC 2,PC 3,PC 4,PC 5,Labels
0,-38232.164182,12.39945,-4.241343,-2.147787,-2.734444,1
1,-32739.939638,10.931861,-4.097475,-2.143795,-2.413203,1


In [16]:
sns.pairplot(df_pca, hue='Label')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<seaborn.axisgrid.PairGrid at 0x7f94cf775340>

In [17]:
from separation_stats import db_2class, corr_select, cohen_d, ttest_select, mwu_select, auc_score

In [18]:
sep_df = pd.DataFrame()
sep_df['Feature'] = feat_names

sep_df['DBI'], db_rank = db_2class(X_feat, Y)
sep_df['Correlation r'] = corr_select(X_feat, Y)
sep_df['T-test p'], sep_df['Cohens d'] = ttest_select(X_feat, Y)
sep_df['MWU-test p'], _ = mwu_select(X_feat, Y)
sep_df['AUC'] = auc_score(X_feat, Y)

sep_df['Cohens d'] = sep_df['Cohens d'].abs()
sep_df['AUC'] = sep_df['AUC'].apply(lambda i: 1 - i if i < 0.5 else i)

In [19]:
sep_df.sort_values('DBI')

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
17,DimensionlessJerk,0.673338,-0.702958,0.0,2.017554,0.0,0.899899
21,PowerSpectralSum,0.732475,-0.712638,0.0,2.073614,0.0,0.922449
20,DominantFrequencyValue,0.75189,-0.639074,0.0,1.696093,0.0,0.896066
1,MeanCrossRate,0.855725,-0.66095,0.0,1.797896,0.0,0.899231
12,PermutationEntropy,0.910431,-0.687836,0.0,1.934348,0.0,0.916319
19,DominantFrequency,0.982553,0.45359,0.0,1.038939,0.0,0.746459
23,SpectralEntropy,1.018023,0.618297,0.0,1.605876,0.0,0.880043
13,ComplexityInvariantDistance,1.080857,-0.612623,0.0,1.582218,0.0,0.873708
14,RangeCountPercentage,1.094363,0.489803,0.0,1.146818,0.0,0.785757
8,Autocorrelation,1.215009,0.581898,0.0,1.460573,0.0,0.873879


In [20]:
sep_df.sort_values('Cohens d', ascending=False)

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
21,PowerSpectralSum,0.732475,-0.712638,0.0,2.073614,0.0,0.922449
17,DimensionlessJerk,0.673338,-0.702958,0.0,2.017554,0.0,0.899899
12,PermutationEntropy,0.910431,-0.687836,0.0,1.934348,0.0,0.916319
1,MeanCrossRate,0.855725,-0.66095,0.0,1.797896,0.0,0.899231
20,DominantFrequencyValue,0.75189,-0.639074,0.0,1.696093,0.0,0.896066
23,SpectralEntropy,1.018023,0.618297,0.0,1.605876,0.0,0.880043
13,ComplexityInvariantDistance,1.080857,-0.612623,0.0,1.582218,0.0,0.873708
8,Autocorrelation,1.215009,0.581898,0.0,1.460573,0.0,0.873879
14,RangeCountPercentage,1.094363,0.489803,0.0,1.146818,0.0,0.785757
22,SpectralFlatness,1.279721,0.480806,0.0,1.11934,0.0,0.802049


In [21]:
sep_df.sort_values('AUC', ascending=False)

Unnamed: 0,Feature,DBI,Correlation r,T-test p,Cohens d,MWU-test p,AUC
21,PowerSpectralSum,0.732475,-0.712638,0.0,2.073614,0.0,0.922449
12,PermutationEntropy,0.910431,-0.687836,0.0,1.934348,0.0,0.916319
6,IQR,1.345826,0.418591,0.0,0.940863,0.0,0.902881
25,DetailPowerRatio,2.657517,0.276403,0.0,0.587094,0.0,0.901279
16,JerkMetric,1.460917,0.40533,0.0,0.905081,0.0,0.899899
17,DimensionlessJerk,0.673338,-0.702958,0.0,2.017554,0.0,0.899899
1,MeanCrossRate,0.855725,-0.66095,0.0,1.797896,0.0,0.899231
2,StdDev,1.342349,0.365138,0.0,0.800636,0.0,0.898253
7,RMS,1.342349,0.365138,0.0,0.800636,0.0,0.898253
24,DetailPower,5.184766,0.0692,3.358175e-80,0.141598,0.0,0.897941


### Pair plots (Top Features)

In [22]:
top_feats = ['PowerSpectralSum', 'PermutationEntropy', 'IQR', 'DominantFrequency', 'DimensionlessJerk', 'DetailPowerRatio']

sns.pairplot(feats, hue='Label', vars=top_feats)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



<seaborn.axisgrid.PairGrid at 0x7f94d3ee4340>