In [2]:
%matplotlib widget

In [3]:
import pandas as pd
import numpy as np
from scipy.signal import butter, sosfiltfilt
import matplotlib.pyplot as plt
import seaborn as sns
import random
from pathlib import Path
import lightgbm as lgb

from sklearn.metrics import f1_score

from PfyMU.gait.train_classifier.core import load_datasets
from PfyMU.features import *

plt.style.use('ggplot')

# Setup

In [4]:
def mag_band_filter(x, fs):
    sos = butter(
        1, 
        [2 * 0.25 / fs, 2 * 5 / fs], 
        btype='band', 
        output='sos'
    )
    
    return sosfiltfilt(sos, np.linalg.norm(x, axis=1))

steps = {
    'walking': 0.4,
    'walking-impaired': 0.2,
    'sitting': 900,
    'standing': 300,
    'stairs-ascending': 0.3,
    'stairs-descending': 0.3,
    'cycling-50W': 0.3,
    'cycling-100W': 0.3,
    'default': 1.0
}

# Load the Data

In [5]:
base_path = Path('/home/lukasadamowicz/Documents/Datasets/processed')

datasets = [
    base_path / 'bluesky2',
    base_path / 'daliac',
    base_path / 'ltmm',
    base_path / 'usc-had'
]

X, Y, subjects, activities = load_datasets(
    paths=datasets,
    goal_fs=50.0,
    window_length=3.0,
    window_step=steps,
    acc_mag=False,
    signal_function=mag_band_filter
)

Y2 = Y.copy()
Y2[['stair' in i for i in activities]] = 1

# Randomize validation/test splits

In [6]:
random.seed(5)
rnd_subjects = [i for i in np.unique(subjects) if np.unique(activities[subjects==i]).size > 3]
random.shuffle(rnd_subjects)

training_masks, validation_masks, testing_masks = [], [], []

for i in range(0, len(rnd_subjects), 4):
    trm = np.ones(len(subjects), dtype='bool')
    vm = np.zeros_like(trm, dtype='bool')
    tem = np.zeros_like(trm, dtype='bool')
    
    for j in range(4):
        trm &= subjects != rnd_subjects[i + j]
        if j < 2:
            vm |= subjects == rnd_subjects[i + j]
        else:
            tem |= subjects == rnd_subjects[i + j]
    
    training_masks.append(trm)
    validation_masks.append(vm)
    testing_masks.append(tem)

# Feature Computation

In [7]:
FB = Bank(window_length=None, window_step=None)

FB.load('final_features.json')

In [8]:
X_feat, fnames = FB.compute(X, fs=50.0, windowed=True, columns=[''])

# LightGBM model setup

In [24]:
best_params = {}
with open('final_lgb_params.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        parts = line.strip('\n').split(':')

        if '.' in parts[1]:
            best_params[parts[0]] = float(parts[1])
        elif parts[1].isnumeric():
            best_params[parts[0]] = int(parts[1])
        else:
            best_params[parts[0]] = parts[1]

# Cross validation

In [25]:
thresh = 0.71

In [35]:
f1, f1_2 = [], []
tp, tp2 = [], []
fp, fp2 = [], []

print('Validation set performance')
for trm, vm, tem in zip(training_masks, validation_masks, testing_masks):
    lgb_cls = lgb.LGBMClassifier(**best_params)
    lgb_cls2 = lgb.LGBMClassifier(**best_params)
    
    lgb_cls.fit(X_feat[trm], Y[trm])
    lgb_cls2.fit(X_feat[trm], Y2[trm])
    
    y_pred = lgb_cls.predict_proba(X_feat[vm])[:, 1]
    y2_pred = lgb_cls2.predict_proba(X_feat[vm])[:, 1]
    
    # compute metrics
    f1.append(f1_score(Y[vm], y_pred > thresh))
    f1_2.append(f1_score(Y2[vm], y2_pred > thresh))
    tp.append((Y[vm] & (y_pred > thresh)).sum() / Y[vm].sum())
    tp2.append((Y2[vm] & (y2_pred > thresh)).sum() / Y2[vm].sum())
    fp.append((~Y[vm].astype(bool) & (y_pred > thresh)).sum() / (Y[vm].size - Y[vm].sum()))
    fp2.append((~Y2[vm].astype(bool) & (y2_pred > thresh)).sum() / (Y2[vm].size - Y2[vm].sum()))
    
    print(f'F1: {f1[-1]*100:6.1f}{f1_2[-1]*100:10.1f}', end='')
    print(f'  TP: {tp[-1]*100:6.1f}{tp2[-1]*100:10.1f}', end='')
    print(f'  FP: {fp[-1]*100:6.1f}{fp2[-1]*100:10.1f}')
    
print('\n', '-' * 50)
print(f'Mean (SD) F1: {np.mean(f1)*100:.1f}({np.std(f1)*100:.1f})   {np.mean(f1_2)*100:.1f}({np.std(f1_2)*100:.1f})')
print(f'Mean (SD) TP: {np.mean(tp)*100:.1f}({np.std(tp)*100:.1f})   {np.mean(tp2)*100:.1f}({np.std(tp2)*100:.1f})')
print(f'Mean (SD) FP: {np.mean(fp)*100:.1f}({np.std(fp)*100:.1f})   {np.mean(fp2)*100:.1f}({np.std(fp2)*100:.1f})')

df = pd.DataFrame(columns=['Model', 'Metric', 'Score'])
df['Metric'] = ['F1'] * len(f1) + ['TP'] * len(tp) + ['FP'] * len(fp)
df['Score'] = f1 + tp + fp
df['Model'] = 'V2'

df.to_csv('v2_validation_results.csv', index=False)

Validation set performance
F1:   95.4      99.5  TP:   93.9      99.7  FP:    2.1       0.6
F1:   92.5      97.8  TP:   95.9     100.0  FP:    4.2       2.8
F1:   93.0      96.4  TP:   93.1      96.4  FP:    3.1       2.2
F1:   91.9      97.5  TP:   97.6     100.0  FP:    5.2       3.2
F1:   82.3      80.7  TP:   70.6      68.2  FP:    2.4       2.1
F1:   94.3      97.3  TP:   89.5      95.3  FP:    0.6       0.9
F1:   87.6      97.6  TP:   79.0      95.4  FP:    1.0       0.4
F1:   94.3      99.0  TP:   95.4      98.2  FP:    7.3       0.4
F1:   94.1      97.1  TP:   96.7      98.0  FP:    5.1       3.6
F1:   93.3      98.7  TP:   94.2      98.3  FP:    3.5       2.1
F1:   80.4      95.6  TP:   75.2     100.0  FP:    5.1       6.8

 --------------------------------------------------
Mean (SD) F1: 90.8(4.9)   96.1(5.0)
Mean (SD) TP: 89.2(9.1)   95.4(8.8)
Mean (SD) FP: 3.6(1.9)   2.3(1.8)


In [34]:
f1, f1_2 = [], []
tp, tp2 = [], []
fp, fp2 = [], []

print('Test set performance')
for trm, vm, tem in zip(training_masks, validation_masks, testing_masks):
    lgb_cls = lgb.LGBMClassifier(n_estimators=125, **best_params)
    lgb_cls2 = lgb.LGBMClassifier(n_estimators=125, **best_params)
    
    lgb_cls.fit(X_feat[trm], Y[trm])
    lgb_cls2.fit(X_feat[trm], Y2[trm])
    
    y_pred = lgb_cls.predict_proba(X_feat[tem])[:, 1]
    y2_pred = lgb_cls2.predict_proba(X_feat[tem])[:, 1]
    
    y_true = Y[tem].astype(bool)
    y2_true = Y2[tem].astype(bool)
    
    # compute metrics
    f1.append(f1_score(y_true, y_pred > thresh))
    f1_2.append(f1_score(y2_true, y2_pred > thresh))
    tp.append((y_true & (y_pred > thresh)).sum() / y_true.sum())
    tp2.append((y2_true & (y2_pred > thresh)).sum() / y2_true.sum())
    fp.append((~y_true & (y_pred > thresh)).sum() / (y_true.size - y_true.sum()))
    fp2.append((~y2_true & (y2_pred > thresh)).sum() / (y2_true.size - y2_true.sum()))
    
    print(f'F1: {f1[-1]*100:6.1f}{f1_2[-1]*100:10.1f}', end='')
    print(f'  TP: {tp[-1]*100:6.1f}{tp2[-1]*100:10.1f}', end='')
    print(f'  FP: {fp[-1]*100:6.1f}{fp2[-1]*100:10.1f}')
    
print('\n', '-' * 50)
print(f'Mean (SD) F1: {np.mean(f1)*100:.1f}({np.std(f1)*100:.1f})   {np.mean(f1_2)*100:.1f}({np.std(f1_2)*100:.1f})')
print(f'Mean (SD) TP: {np.mean(tp)*100:.1f}({np.std(tp)*100:.1f})   {np.mean(tp2)*100:.1f}({np.std(tp2)*100:.1f})')
print(f'Mean (SD) FP: {np.mean(fp)*100:.1f}({np.std(fp)*100:.1f})   {np.mean(fp2)*100:.1f}({np.std(fp2)*100:.1f})')

df = pd.DataFrame(columns=['Model', 'Metric', 'Score'])
df['Metric'] = ['F1'] * len(f1) + ['TP'] * len(tp) + ['FP'] * len(fp)
df['Score'] = f1 + tp + fp
df['Model'] = 'V2'

df.to_csv('v2_test_results.csv', index=False)

Test set performance
F1:   91.0      95.3  TP:   86.8      91.5  FP:    4.0       0.6
F1:   86.1      96.9  TP:   89.4      98.5  FP:    7.4       3.2
F1:   91.1      95.5  TP:   85.1      97.1  FP:    1.4      10.0
F1:   87.1      98.7  TP:   87.0      97.7  FP:    7.9       0.5
F1:   84.9      95.9  TP:   96.2      99.6  FP:   13.4      12.7
F1:   73.1      89.3  TP:   60.6      80.8  FP:    3.6       0.7
F1:   90.4      99.2  TP:   84.6      99.6  FP:    1.7       1.5
F1:   88.6      96.6  TP:   88.8      96.6  FP:    3.7       1.9
F1:   59.8      60.0  TP:   44.4      44.7  FP:    5.2       5.4
F1:   97.3      98.6  TP:   96.0      98.0  FP:    1.6       1.3
F1:   92.6      99.0  TP:   91.9     100.0  FP:    3.2       1.6

 --------------------------------------------------
Mean (SD) F1: 85.6(10.0)   93.2(10.8)
Mean (SD) TP: 82.8(15.2)   91.3(15.7)
Mean (SD) FP: 4.8(3.4)   3.6(3.9)


In [72]:
f1, tp, fp = [], [], []
params = best_params.copy()
params['n_estimators'] = 125
params['boosting_type'] = 'gbdt'
params['subsample_for_bin'] = 200000
params['subsample'] = 1.0
params['subsample_freq'] = 0
params['colsample_bytree'] = 1.0
params['objective'] = 'binary'
params['metrics'] = ['binary']

print('Validation set performance')
for trm, vm, tem in zip(training_masks, validation_masks, testing_masks):
    x_dst = lgb.Dataset(X_feat[trm], label=Y[trm])
    bst = lgb.train(params, x_dst, num_boost_round=100)
    
    y_pred = bst.predict(X_feat[vm])
    
    # compute metrics
    f1.append(f1_score(Y[vm], y_pred > thresh))
    tp.append((Y[vm] & (y_pred > thresh)).sum() / Y[vm].sum())
    fp.append((~Y[vm].astype(bool) & (y_pred > thresh)).sum() / (Y[vm].size - Y[vm].sum()))
    
    print(f'F1: {f1[-1]*100:6.1f}', end='')
    print(f'  TP: {tp[-1]*100:6.1f}', end='')
    print(f'  FP: {fp[-1]*100:6.1f}')
    
print('\n', '-' * 50)
print(f'Mean (SD) F1: {np.mean(f1)*100:.1f}({np.std(f1)*100:.1f})')
print(f'Mean (SD) TP: {np.mean(tp)*100:.1f}({np.std(tp)*100:.1f})')
print(f'Mean (SD) FP: {np.mean(fp)*100:.1f}({np.std(fp)*100:.1f})')

Validation set performance
F1:   95.3  TP:   93.7  FP:    2.1
F1:   92.6  TP:   95.9  FP:    4.1
F1:   93.2  TP:   93.4  FP:    3.1
F1:   92.0  TP:   97.8  FP:    5.2
F1:   82.0  TP:   70.1  FP:    2.4
F1:   94.9  TP:   90.4  FP:    0.3
F1:   87.8  TP:   79.3  FP:    0.9
F1:   94.3  TP:   95.3  FP:    7.0
F1:   94.4  TP:   97.0  FP:    4.8
F1:   93.5  TP:   94.4  FP:    3.4
F1:   79.5  TP:   74.3  FP:    5.5

 --------------------------------------------------
Mean (SD) F1: 90.9(5.2)
Mean (SD) TP: 89.2(9.4)
Mean (SD) FP: 3.5(1.9)


In [38]:
params = best_params.copy()
params['n_estimators'] = 125
params['boosting_type'] = 'gbdt'
params['subsample_for_bin'] = 200000
params['subsample'] = 1.0
params['subsample_freq'] = 0
params['colsample_bytree'] = 1.0
params['objective'] = 'binary'
params['metrics'] = ['binary']


x_dst = lgb.Dataset(X_feat, label=Y)
bst = lgb.train(params, x_dst, num_boost_round=100)

bst.save_model('lgbm_gait_classifier_no-stairs.lgbm')

<lightgbm.basic.Booster at 0x7f367ff93b20>