In [1]:
%matplotlib widget

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from pathlib import Path

import lightgbm as lgb

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, roc_curve, make_scorer
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_validate

from PfyMU.gait.train_classifier.core import load_datasets
from PfyMU.features import *

plt.style.use('ggplot')

In [68]:
from scipy.signal import butter, sosfiltfilt

def pca_func(x, fs):
    pca = PCA(n_components=3)
    
    return pca.fit_transform(x)


def vert_acc_func(x, fs):
    sos = butter(4, 2 * 0.8 / fs, btype='low', output='sos')
    g_est = sosfiltfilt(sos, x, axis=0)
    
    return np.sum(x * g_est, axis=1)


def mag_filter(x, fs):
    sos = butter(1, 2 * 5 / fs, btype='low', output='sos')
    x_ = np.linalg.norm(x, axis=1)
    return sosfiltfilt(sos, x_) - 1
#     return x_ - 1

def mag_band_filter(x, fs):
    sos = butter(4, [2 * 0.25 / fs, 2 * 5 / fs], btype='band', output='sos')
#     sos = butter(4, 2*5/fs, output='sos')
    x_ = np.linalg.norm(x, axis=1)

    return sosfiltfilt(sos, x_)

def band_filter(x, fs):
    sos = butter(4, [2 * 0.25 / fs, 2 * 5 / fs], btype='band', output='sos')
    return sosfiltfilt(sos, x, axis=0)
    
steps = {
    'walking': 0.4,
    'walking-impaired': 0.2,
    'sitting': 900,
    'standing': 300,
    'stairs-ascending': 0.3,
    'stairs-descending': 0.3,
    'cycling-50W': 0.3,
    'cycling-100W': 0.3,
    'default': 1.0
}

In [38]:
# gait_sets_path = Path('/Users/adamol/Documents/Datasets/gait/processed')
gait_sets_path = Path('/home/lukasadamowicz/Documents/Datasets/processed')

datasets = [
    gait_sets_path / 'bluesky2',
    gait_sets_path / 'daliac',
    gait_sets_path / 'ltmm',
    gait_sets_path / 'usc-had'
]

kwargs = {'paths': datasets, 'goal_fs': 50.0, 'window_step': steps, 'window_length': 3.0}

In [5]:
X, Y, subjects, activities = load_datasets(acc_mag=False, signal_function=pca_func, **kwargs)

In [6]:
X_vacc, _, _, _ = load_datasets(acc_mag=False, signal_function=vert_acc_func, **kwargs)

In [43]:
X_none, _, _, _ = load_datasets(acc_mag=False, signal_function=None, **kwargs)

In [69]:
X_mf, *_ = load_datasets(acc_mag=False, signal_function=mag_filter, **kwargs)

In [9]:
X_mbf, *_ = load_datasets(acc_mag=False, signal_function=mag_band_filter, **kwargs)

In [44]:
pca = PCA(n_components=3)
ratio_1_2 = np.zeros(X_none.shape[0])

for i in range(X_none.shape[0]):
    X_none[i, :, :] = pca.fit_transform(X_none[i, :, :])
    ratio_1_2[i] = pca.explained_variance_ratio_[1] / pca.explained_variance_ratio_[0]

In [11]:
print('Total samples (3.0s windows): ', Y.size)
print('Total walking samples: ', Y.sum())
print('Total non-walking samples: ', Y.size - Y.sum(), '\n')
print(f'% walking samples: {Y.sum() / Y.size * 100:.2f}')

Total samples (3.0s windows):  43163
Total walking samples:  20518
Total non-walking samples:  22645 

% walking samples: 47.54


In [12]:
unq_act, act_ct = np.unique(activities, return_counts=True)
N = np.sum(act_ct)
si = np.argsort(act_ct)
for a, c in zip(unq_act[si], act_ct[si]):
    print(f'{a:25s}: {c:5d} / {N:5d}{c/N:8.2f}')

sit-to-stand             :    16 / 43163    0.00
standing-assisted        :   207 / 43163    0.00
jumping-rope             :   212 / 43163    0.00
jumping                  :   311 / 43163    0.01
vacuuming                :   376 / 43163    0.01
lying                    :   378 / 43163    0.01
elevator-descending      :   475 / 43163    0.01
elevator-ascending       :   491 / 43163    0.01
running                  :   541 / 43163    0.01
sweeping                 :   612 / 43163    0.01
running-treadmill        :   755 / 43163    0.02
washing-dishes           :   776 / 43163    0.02
walking-left             :   787 / 43163    0.02
walking-right            :   842 / 43163    0.02
sleeping                 :  1126 / 43163    0.03
stairs-descending        :  2477 / 43163    0.06
cycling-50W              :  2509 / 43163    0.06
cycling-100W             :  2515 / 43163    0.06
stairs-ascending         :  2763 / 43163    0.06
standing                 :  2953 / 43163    0.07
sitting             

In [13]:
random.seed(398)
rnd_subjects = [i for i in np.unique(subjects) if np.unique(activities[subjects==i]).size > 3]
random.shuffle(rnd_subjects)

training_masks, validation_masks, testing_masks = [], [], []

for i in range(0, len(rnd_subjects), 4):
    trm = np.ones(len(subjects), dtype='bool')
    vm = np.zeros_like(trm, dtype='bool')
    tem = np.zeros_like(trm, dtype='bool')
    
    for j in range(4):
        trm &= subjects != rnd_subjects[i + j]
        if j < 2:
            vm |= subjects == rnd_subjects[i + j]
        else:
            tem |= subjects == rnd_subjects[i + j]
    
    training_masks.append(trm)
    validation_masks.append(vm)
    testing_masks.append(tem)

In [14]:
FB = Bank(window_length=None, window_step=None)

# add features
FB + Mean()
FB + MeanCrossRate()
# FB + StdDev()  # highly correlated with RMS
FB + Skewness()
FB + Kurtosis()
FB + Range()
FB + IQR()
FB + RMS()
FB + LinearSlope()
FB + SignalEntropy()
FB + SPARC()
FB + ComplexityInvariantDistance(normalize=True)
FB + JerkMetric(normalize=True)
FB + DimensionlessJerk(log=True, signal_type='acceleration')

FB + Autocorrelation(lag=15, normalize=True)
FB + SampleEntropy(m=2, r=0.5)
FB + PermutationEntropy(order=3, delay=1, normalize=True)
FB + RangeCountPercentage(range_min=0.4, range_max=1.5)
FB + RangeCountPercentage(range_min=-0.5, range_max=0.5)
FB + DominantFrequency(low_cutoff=1.0, high_cutoff=3.5)
FB + DominantFrequencyValue(low_cutoff=0.25, high_cutoff=5.0)
FB + PowerSpectralSum(low_cutoff=1.0, high_cutoff=3.5)
FB + SpectralFlatness(low_cutoff=0.0, high_cutoff=6.0)
FB + SpectralEntropy(low_cutoff=0.0, high_cutoff=5.0)
FB + DetailPowerRatio(wavelet='coif4', freq_band=[1.0, 3.0])

In [15]:
X_feat, feature_names = FB.compute(X, fs=50.0, windowed=True, columns=['PC1', 'PC2', 'PC3'])



In [16]:
vacc_feat, vacc_fnames = FB.compute(X_vacc, fs=50.0, windowed=True, columns=[''])

In [45]:
X_pca, featuture_names = FB.compute(X_none, fs=50.0, windowed=True, columns=['PC1', 'PC2', 'PC3'])



In [70]:
X_mf, mf_fnames = FB.compute(X_mf, fs=50.0, windowed=True, columns=[''])

In [19]:
X_mbf2, mft_fnames = FB.compute(X_mbf, fs=50.0, windowed=True, columns=[''])

In [20]:
print(vacc_feat.shape, X_vacc.shape)

(43163, 24) (43163, 150)


In [21]:
print(X_feat.shape, X.shape)

(43163, 72) (43163, 150, 3)


# LightGBM Model

In [22]:
lgb_model = lgb.LGBMClassifier(learning_rate=0.2, random_state=12049)

# PCA before windowing

Probably not the best approach since the activities are split up, compared to use case

In [23]:
scores = cross_validate(
    lgb_model,
    X_feat,
    Y,
    scoring={'F1': make_scorer(f1_score), 'bal_acc': make_scorer(balanced_accuracy_score)},
    cv=zip(training_masks, validation_masks),
    n_jobs=-1
)
print(f'Mean Bal. Acc.: {np.mean(scores["test_bal_acc"])*100:.2f}  Mean F1: {np.mean(scores["test_F1"])*100:.2f}')

Mean Bal. Acc.: 93.79  Mean F1: 91.70


In [None]:
f, (ax, ax1) = plt.subplots(ncols=2, figsize=(10, 5))

i = 1
for fp, tp, tr in zip(fpr, tpr, trsh):
    ax.plot(fp, tp, label=f'Fold {i}')
    ax1.plot(tr, np.sqrt(fp**2 + (1 - tp)**2))
    i += 1

ax1.set_xlim(-0.05, 1.05)
ax.legend()
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
f.tight_layout()

In [None]:
act_scores

# Vertical Acceleration

In [24]:
scores = cross_validate(
    lgb_model,
    vacc_feat,
    Y,
    scoring={'F1': make_scorer(f1_score), 'bal_acc': make_scorer(balanced_accuracy_score)},
    cv=zip(training_masks, validation_masks),
    n_jobs=-1
)
print(f'Mean Bal. Acc.: {np.mean(scores["test_bal_acc"])*100:.2f}  Mean F1: {np.mean(scores["test_F1"])*100:.2f}')

Mean Bal. Acc.: 90.53  Mean F1: 87.28


# PCA but after windowing

In [46]:
scores = cross_validate(
    lgb_model,
    X_pca,
    Y,
    scoring={'F1': make_scorer(f1_score), 'bal_acc': make_scorer(balanced_accuracy_score)},
    cv=zip(training_masks, validation_masks),
    n_jobs=-1
)
print(f'Mean Bal. Acc.: {np.mean(scores["test_bal_acc"])*100:.2f}  Mean F1: {np.mean(scores["test_F1"])*100:.2f}')

Mean Bal. Acc.: 93.15  Mean F1: 90.81


# Acceleration Magnitude minus 1

In [71]:
print(X_mf.shape)

scores = cross_validate(
    lgb_model,
    X_mf,
    Y,
    scoring={'F1': make_scorer(f1_score), 'bal_acc': make_scorer(balanced_accuracy_score)},
    cv=zip(training_masks, validation_masks),
    n_jobs=-1
)
print(f'Mean Bal. Acc.: {np.mean(scores["test_bal_acc"])*100:.2f}  Mean F1: {np.mean(scores["test_F1"])*100:.2f}')

(43163, 24)
Mean Bal. Acc.: 92.13  Mean F1: 89.24


# Filtered Acceleration Magnitude

In [27]:
scores = cross_validate(
    lgb_model,
    X_mbf,
    Y,
    scoring={'F1': make_scorer(f1_score), 'bal_acc': make_scorer(balanced_accuracy_score)},
    cv=zip(training_masks, validation_masks),
    n_jobs=-1
)
print(f'Mean Bal. Acc.: {np.mean(scores["test_bal_acc"])*100:.2f}  Mean F1: {np.mean(scores["test_F1"])*100:.2f}')

Mean Bal. Acc.: 90.90  Mean F1: 87.14


In [77]:
len(mft_fnames)

24