In [1]:
%matplotlib widget

In [2]:
import pandas as pd
import numpy as np
from scipy.signal import butter, sosfiltfilt
import matplotlib.pyplot as plt
import seaborn as sns
import random
from pathlib import Path
import lightgbm as lgb

from sklearn.metrics import f1_score

from PfyMU.gait.train_classifier.core import load_datasets
from PfyMU.features import *

plt.style.use('ggplot')

# Setup

In [3]:
def band_filter(x, fs):
    sos = butter(
        1, 
        [2 * 0.5 / fs, 2 * 3 / fs], 
        btype='band', 
        output='sos'
    )
    
    return sosfiltfilt(sos, x, axis=0)

steps = {
    'walking': 0.4,
    'walking-impaired': 0.2,
    'sitting': 900,
    'standing': 300,
    'stairs-ascending': 0.3,
    'stairs-descending': 0.3,
    'cycling-50W': 0.3,
    'cycling-100W': 0.3,
    'default': 1.0
}

# Load the Data

In [4]:
base_path = Path('/home/lukasadamowicz/Documents/Datasets/processed')

datasets = [
    base_path / 'bluesky2',
    base_path / 'daliac',
    base_path / 'ltmm',
    base_path / 'usc-had'
]

X_, Y, subjects, activities = load_datasets(
    paths=datasets,
    goal_fs=50.0,
    window_length=3.0,
    window_step=steps,
    acc_mag=False,
    signal_function=band_filter
)

In [5]:
x_nf, _, _, _ = load_datasets(
    paths=datasets,
    goal_fs=50.0,
    window_length=3.0,
    window_step=steps,
    acc_mag=False,
    signal_function=None
)

x_mn = Mean().compute(x_nf)

In [6]:
X = np.zeros(X_.shape[:-1])
for subj in np.unique(subjects):
    mask = (subjects == subj) & ((activities == 'walking-impaired') | (activities == 'walking'))
    if mask.sum() == 0:
        print('no mask')
    idx = np.argmax(np.abs(np.sum(x_mn[mask], axis=0)))
    
    if np.sum(x_mn[mask], axis=0)[idx] < 0:
        X[subjects == subj] = X_[subjects == subj, :, idx]
    else:
        X[subjects == subj] = -1 * X_[subjects == subj, :, idx]

# Randomize validation/test splits

In [7]:
random.seed(5)
rnd_subjects = [i for i in np.unique(subjects) if np.unique(activities[subjects==i]).size > 3]
random.shuffle(rnd_subjects)

training_masks, validation_masks, testing_masks = [], [], []

for i in range(0, len(rnd_subjects), 4):
    trm = np.ones(len(subjects), dtype='bool')
    vm = np.zeros_like(trm, dtype='bool')
    tem = np.zeros_like(trm, dtype='bool')
    
    for j in range(4):
        trm &= subjects != rnd_subjects[i + j]
        if j < 2:
            vm |= subjects == rnd_subjects[i + j]
        else:
            tem |= subjects == rnd_subjects[i + j]
    
    training_masks.append(trm)
    validation_masks.append(vm)
    testing_masks.append(tem)

# Feature Computation

In [8]:
FB = Bank(window_length=None, window_step=None)

FB + PowerSpectralSum(low_cutoff=0.0, high_cutoff=12.0)
FB + DominantFrequency(low_cutoff=0.0, high_cutoff=12.0)
FB + MeanCrossRate()
FB + Range()
FB + RMS()
FB + SignalEntropy()
FB + SpectralEntropy(low_cutoff=0.0, high_cutoff=12.0)
FB + SpectralFlatness(low_cutoff=0.0, high_cutoff=12.0)

In [9]:
X_feat, fnames = FB.compute(X, fs=50.0, windowed=True, columns=[''])

# Model loading and testing

In [10]:
import pickle

with open('gaitpy_v1/model.pkl', 'rb') as f:
    model = pickle.load(f)



# Cross validation

In [16]:
thresh = 0.5

In [26]:
f1, tp, fp = [], [], []

print('Validation set performance')
for trm, vm, tem in zip(training_masks, validation_masks, testing_masks):
    y_pred = model.predict_proba(X_feat[vm])[:, 1]
    
    # compute metrics
    f1.append(f1_score(Y[vm], y_pred > thresh))
    tp.append((Y[vm] & (y_pred > thresh)).sum() / Y[vm].sum())
    fp.append((~Y[vm].astype(bool) & (y_pred > thresh)).sum() / (Y[vm].size - Y[vm].sum()))
    
    print(f'F1: {f1[-1]*100:6.1f}', end='')
    print(f'  TP: {tp[-1]*100:6.1f}', end='')
    print(f'  FP: {fp[-1]*100:6.1f}')
    
print('\n', '-' * 50)
print(f'Mean (SD) F1: {np.mean(f1)*100:.1f}({np.std(f1)*100:.1f})')
print(f'Mean (SD) TP: {np.mean(tp)*100:.1f}({np.std(tp)*100:.1f})')
print(f'Mean (SD) FP: {np.mean(fp)*100:.1f}({np.std(fp)*100:.1f})')

df = pd.DataFrame(columns=['Model', 'Metric', 'Score'])
df['Metric'] = ['F1'] * len(f1) + ['TP'] * len(tp) + ['FP'] * len(fp)
df['Score'] = f1 + tp + fp
df['Model'] = 'V1'

df.to_csv('v1_validation_results.csv', index=False)

Validation set performance
F1:   84.7  TP:  100.0  FP:   26.0
F1:   55.5  TP:  100.0  FP:   59.0
F1:   62.6  TP:   98.9  FP:   50.5
F1:   50.0  TP:  100.0  FP:   70.1
F1:   92.8  TP:   89.9  FP:    9.0
F1:   99.2  TP:  100.0  FP:    3.2
F1:   69.6  TP:   99.9  FP:   59.5
F1:   80.8  TP:   99.6  FP:   48.9
F1:   63.5  TP:  100.0  FP:   65.6
F1:   62.8  TP:  100.0  FP:   53.7
F1:   63.2  TP:  100.0  FP:   50.4

 --------------------------------------------------
Mean (SD) F1: 71.3(15.1)
Mean (SD) TP: 98.9(2.9)
Mean (SD) FP: 45.1(21.4)


In [28]:
f1, tp, fp = [], [], []

print('Test set performance')
for trm, vm, tem in zip(training_masks, validation_masks, testing_masks):
    y_pred = model.predict_proba(X_feat[tem])[:, 1]
    
    # compute metrics
    f1.append(f1_score(Y[tem], y_pred > thresh))
    tp.append((Y[tem] & (y_pred > thresh)).sum() / Y[tem].sum())
    fp.append((~Y[tem].astype(bool) & (y_pred > thresh)).sum() / (Y[tem].size - Y[tem].sum()))
    
    print(f'F1: {f1[-1]*100:6.1f}', end='')
    print(f'  TP: {tp[-1]*100:6.1f}', end='')
    print(f'  FP: {fp[-1]*100:6.1f}')
    
print('\n', '-' * 50)
print(f'Mean (SD) F1: {np.mean(f1)*100:.1f}({np.std(f1)*100:.1f})')
print(f'Mean (SD) TP: {np.mean(tp)*100:.1f}({np.std(tp)*100:.1f})')
print(f'Mean (SD) FP: {np.mean(fp)*100:.1f}({np.std(fp)*100:.1f})')

df = pd.DataFrame(columns=['Model', 'Metric', 'Score'])
df['Metric'] = ['F1'] * len(f1) + ['TP'] * len(tp) + ['FP'] * len(fp)
df['Score'] = f1 + tp + fp
df['Model'] = 'V1'

df.to_csv('v1_test_results.csv', index=False)

Test set performance
F1:   89.5  TP:   95.1  FP:   17.7
F1:   63.5  TP:  100.0  FP:   46.6
F1:   85.1  TP:  100.0  FP:   29.2
F1:   70.0  TP:   99.9  FP:   53.5
F1:   53.9  TP:  100.0  FP:   75.2
F1:   48.9  TP:   63.4  FP:   65.7
F1:   66.4  TP:  100.0  FP:   66.8
F1:   51.0  TP:  100.0  FP:   60.6
F1:   76.6  TP:   67.0  FP:   10.0
F1:   89.8  TP:   98.6  FP:   24.9
F1:   61.1  TP:  100.0  FP:   61.9

 --------------------------------------------------
Mean (SD) F1: 68.7(14.2)
Mean (SD) TP: 93.1(13.2)
Mean (SD) FP: 46.6(21.4)


In [29]:
c = 1
for i, j, k in zip(f1, tp, fp):
    print(f'| Fold {c} | {i*100:.1f} | {j*100:.1f} | {k*100:.1f} |')
    c += 1


| Fold 1 | 89.5 | 95.1 | 17.7 |
| Fold 2 | 63.5 | 100.0 | 46.6 |
| Fold 3 | 85.1 | 100.0 | 29.2 |
| Fold 4 | 70.0 | 99.9 | 53.5 |
| Fold 5 | 53.9 | 100.0 | 75.2 |
| Fold 6 | 48.9 | 63.4 | 65.7 |
| Fold 7 | 66.4 | 100.0 | 66.8 |
| Fold 8 | 51.0 | 100.0 | 60.6 |
| Fold 9 | 76.6 | 67.0 | 10.0 |
| Fold 10 | 89.8 | 98.6 | 24.9 |
| Fold 11 | 61.1 | 100.0 | 61.9 |
