In [1]:
%matplotlib widget

In [2]:
import pandas as pd
import numpy as np
import random
from sklearn.base import clone
from sklearn.ensemble import IsolationForest, RandomForestClassifier
from sklearn.svm import OneClassSVM, SVC
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, precision_score, make_scorer
from sklearn.model_selection import cross_val_score
import xgboost as xgb
import lightgbm as lgb

In [5]:
data = pd.read_hdf('../feature_exploration/features.h5', key='no_preprocessing')
feats = data.iloc[:, 3:]
labels = data.Label

In [6]:
data.Activity.unique()

array(['sitting', 'standing', 'walking', 'walking-impaired',
       'standing-assisted', 'sit-to-stand', 'cycling-100W', 'cycling-50W',
       'jumping-rope', 'lying', 'running-treadmill', 'stairs-ascending',
       'stairs-descending', 'sweeping', 'vacuuming', 'washing-dishes',
       'elevator-ascending', 'elevator-descending', 'jumping', 'running',
       'sleeping', 'walking-left', 'walking-right'], dtype=object)

In [7]:
# load data with stairs as positive class
data_strs = pd.read_hdf('../feature_exploration/features.h5', key='incl_stairs')
feats_strs = data_strs.iloc[:, 3:]
labels_strs = data_strs.Label

In [8]:
# get the k-fold splits, but on subjects
gbc = data.groupby(['Subject', 'Activity'], as_index=False).count()
fold_subjects = [i for i in gbc.Subject.unique() if gbc.loc[gbc.Subject == i].shape[0] > 3]

random.seed(5)  # fix the generation for repeatability
random.shuffle(fold_subjects)

trn_m, utrn_m, val_m, tst_m = [], [], [], []
n_lo = 4
for i in range(0, len(fold_subjects), n_lo):
    trn_m.append(np.ones(data.shape[0], dtype='bool'))
    utrn_m.append(np.ones(data.shape[0], dtype='bool'))
    val_m.append(np.zeros(data.shape[0], dtype='bool'))
    tst_m.append(np.zeros(data.shape[0], dtype='bool'))
    
    for j in range(n_lo):
        trn_m[-1] &= (data.Subject != fold_subjects[i+j]).values
        utrn_m[-1] &= (data.Subject != fold_subjects[i+j]).values
    for j in range(n_lo - (n_lo // 2)):
        val_m[-1] |= (data.Subject == fold_subjects[i+j]).values
    for j in range(n_lo - (n_lo // 2), n_lo):
        tst_m[-1] |= (data.Subject == fold_subjects[i+j]).values
    
    # make the unary (one-class) classifier mask to be 
    # only walking activities for the specific subjects
    mask = np.zeros(data.shape[0], dtype='bool')
    for a in [i for i in data.Activity.unique() if 'walking' in i]:
        mask |= (data.Activity == a).values
    utrn_m[-1] &= mask

cv = tuple(zip(trn_m, val_m))
ucv = tuple(zip(utrn_m, val_m))

In [9]:
def uf1_scorer(estimator, X, y):
    y_pred = estimator.predict(X)
    y_pred[y_pred == -1] = 0
    
    return f1_score(y, y_pred)

# Overground Walking Classification Summary

This is a summary of the work-process for classifying overground gait from a lumbar accelerometer

## 0. Activity/Class balancing

Window length: 3s
Overlap: variable
Sampling Frequency: 50Hz (downsampled where necessary)

Loading data with a constand window for all activities didn't result in a nice distribution of activities, especially those which need to be captured for the classifier to work well, such as running, or stair ascent/descent.

In order to provide a better balance of classes, the spacing or overlap between windows was adjust per activity to the following:

| Activity             | Overlap |
|----------------------|---------|
| jumping-rope         | 0.15    |
| stairs-descending    | 0.10    |
| stairs-ascending     | 0.10    |
| jumping              | 0.15    |
| lying                | 0.15    |
| elevator-ascending   | 0.15    |
| elevator-descending  | 0.15    |
| running              | 0.075   |
| sweeping             | 0.15    |
| standing             | 225     |
| running-treadmill    | 0.10    |
| cycling-50W          | 0.12    |
| cycling-100W         | 0.12    |
| walking-left         | 0.20    |
| walking-right        | 0.20    |
| walking-impaired     | 0.20    |
| walking              | 0.25    |
| sitting              | 400     |
| default              | 0.50    |

where a float indicates the % overlap between adjacent windows (ie 0.5 would be 50% overlap, or 75 samples between starts), and an integer indicates the number of samples between window starts (so 400 would exclude data of samples 151-400)

This results in the following distribution of positive/negative samples (stair ascending/descending in the negative class for now):

| Category    | Samples |
|-------------|---------|
| Total       | 105,029 |
| Walking     | 32,781  |
| Non-walking | 73,986  |
|-------------|---------|
| % walking   | 30.70%  |

and the following specific activity breakdown:

| Activity                 | Samples / Total   | % Samples |
|--------------------------|-------------------|-----------|
| sit-to-stand             |     30 / 105,029  |  0.00     |
| standing-assisted        |    401 / 105,029  |  0.00     |
| vacuuming                |    736 / 105,029  |  0.01     |
| lying                    |    739 / 105,029  |  0.01     |
| jumping-rope             |  1,373 / 105,029  |  0.01     |
| washing-dishes           |  1,538 / 105,029  |  0.01     |
| jumping                  |  1,869 / 105,029  |  0.02     |
| sleeping                 |  2,245 / 105,029  |  0.02     |
| elevator-descending      |  3,070 / 105,029  |  0.03     |
| elevator-ascending       |  3,119 / 105,029  |  0.03     |
| walking-left             |  3,774 / 105,029  |  0.04     |
| standing                 |  3,875 / 105,029  |  0.04     |
| walking-right            |  4,023 / 105,029  |  0.04     |
| sweeping                 |  4,090 / 105,029  |  0.04     |
| cycling-50W              |  6,251 / 105,029  |  0.06     |
| cycling-100W             |  6,266 / 105,029  |  0.06     |
| running                  |  6,764 / 105,029  |  0.06     |
| sitting                  |  6,920 / 105,029  |  0.07     |
| stairs-descending        |  7,336 / 105,029  |  0.07     |
| running-treadmill        |  7,415 / 105,029  |  0.07     |
| stairs-ascending         |  8,211 / 105,029  |  0.08     |
| walking-impaired         |  8,241 / 105,029  |  0.08     |
| walking                  | 16,743 / 105,029  |  0.16     |

Features were compute on the magnitude of the acceleration, computed per:
$$a_{mag} = \sqrt{a_x^2+a_y^2+a_z^2}$$

The following features were computed:

| Feature                      | Parameters | 
|------------------------------|------------|
| Mean                         |                                         |
| MeanCrossRate                |                                         |
| StdDev                       |                                         |
| Skewness                     |                                         |
| Kurtosis                     |                                         |
| Range                        |                                         |
| IQR                          |                                         |
| RMS                          |                                         |
| Autocorrelation              | lag=1, normalize=True                   |
| LinearSlope                  |                                         |
| SignalEntropy                |                                         |
| SampleEntropy                | m=4, r=1.0                              |
| PermutationEntropy           | order=3, delay=1, normalize=True        |
| ComplexityInvariantDistance  | normalize=True                          |
| RangeCountPercentage         | range_min=0, range_max=1.0              |
| RatioBeyondRSigma            | r=2.0                                   |
| JerkMetric                   | normalize=True                          |
| DimensionlessJerk            | log=True, signal_type='acceleration'    |
| SPARC                        |                                         |
| DominantFrequency            | low_cutoff=0.25, high_cutoff=5.0        |
| DominantFrequencyValue       | low_cutoff=0.25, high_cutoff=5.0        |
| PowerSpectralSum             | low_cutoff=0.25, high_cutoff=5.0        |
| SpectralFlatness             | low_cutoff=0.25, high_cutoff=5.0        |
| SpectralEntropy              | low_cutoff=0.25, high_cutoff=5.0        |
| DetailPower                  | wavelet='coif4', freq_band=[1.0, 3.0]   |
| DetailPowerRatio             | wavelet='coif4', freq_band=[1.0, 3.0]   |

<div class="alert alert-block alert-warning">
<b>TODO:</b> Explore effect of changing parameters. Could use PPScore to see effect on classification
</div>

## 1. Model Selection

Initial testing on binary classifiers SVM/RandomForest/XGBoost and one-class classifiers IsolationForest/OneClassSVM

Initial testing was done with stair ascent/descent as the __*negative*__ class

Overall performance showed that a binary classifier would achieve higher results. Initially dropping some features showed little/no change in performance, but as features were dropped based on feature performance in the classifiers this isn't surprising

### 1a: Random Forest

In [32]:
RF = RandomForestClassifier(n_estimators=20)
cv_res = cross_val_score(
    RF, 
    feats, 
    labels, 
    scoring=make_scorer(f1_score), 
    cv=cv, 
    n_jobs=-1
)
print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 81.32


### 1b: SVM
SVM takes a while to run

Average F1: ~58.84

In [21]:
svm = SVC(C=1.0, kernel='rbf')
run = False
if run:
    cv_res = cross_val_score(
        svm, 
        feats,
        labels,
        scoring=make_scorer(f1_score),
        cv=cv,
        n_jobs=-1
    )
    print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 58.84


### 1c. XGBoost
In the end showed similar performance to the random forest, but has the benefit of faster run times and better persistence/saving

In [33]:
clf = xgb.XGBClassifier()
cv_res = cross_val_score(
    clf,
    feats,
    labels,
    scoring=make_scorer(f1_score),
    cv=cv,
    n_jobs=-1
)
print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 84.01


### 1d: Isolation Forest

In [38]:
IF = IsolationForest(n_estimators=20)
cv_res = cross_val_score(
    IF,
    feats,
    labels,
    scoring=uf1_scorer,
    cv=ucv,
    n_jobs=-1
)
print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 61.86


### 1e: One-class SVM
Also takes a while

Average F1: ~48.52

In [39]:
usvc = OneClassSVM(kernel='rbf')
run = False
if run:
    cv_res = cross_val_score(
        usvc,
        feats,
        labels,
        scoring=uf1_scorer,
        cv=ucv,
        n_jobs=-1
    )
    print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 48.52


Given the large number of classes, it isn't surprising that the binary classifiers end up working better, given that they should be able to account for the feature space occupied by the other activities better

### Stair ascent/descent

The most confusion in the training results from the stair ascent/descent tasks, which is intuitive give that they are the most similar to walking, especially when using acceleration magnitude. Therefore, some tests including stairs in the positive class showed that the performance jumped about 10 points, up to $~93\%$.  

The intent with lumping them together was to have cascaded models where another classifier, or DSP model would differentiate between normal overground walking and stair climbing.

#### 1f: XGBoost with stairs as positive class

From $~84\%$ to $~94\%$ with the inclusion of stairs

In [10]:
clf = xgb.XGBClassifier()
cv_res = cross_val_score(
    clf,
    feats_strs,
    labels_strs,
    scoring=make_scorer(f1_score),
    cv=cv,
    n_jobs=-1
)
print(f'Average F1: {np.mean(cv_res)*100:.2f}')

Average F1: 94.65


### lazypredict

Next, I tried lazypredict, a python package that runs a bunch of classifiers without any optimization. It ran on 1 fold of the training/validation data, generating the below results:

![lazypredict fold 1 results](lazypredict_results.png "lazypredict results for first fold")

Looking at the results, not only did LightGBM work the best (only for one fold, but still), it is also quite fast. I did some testing, and the results are certainly no worse than other models, and might be slightly better, with very fast training/prediction times as well

## 2. Parameter selection

With several promising models, further testing was on which model features yeilded the best results. This search was done using the scikit-learn RandomizedSearchCV, overal several of the prominent parameters for each of the models chosen:

- RandomForest
- XGBoost
- LightGBM

note that the *mean_test_score* in the RFC results is accuracy, not F1 score

In [37]:
rfc_rscv = pd.read_csv('rfc_cv_results_incl_stairs.csv', index_col=0)
cols = ['mean_test_score'] + [i for i in rfc_rscv.columns if 'param_' in i]
rfc_rscv.sort_values('rank_test_score').loc[:, cols].head(5)

Unnamed: 0,mean_test_score,param_criterion,param_max_depth,param_min_samples_leaf,param_min_samples_split,param_n_estimators
40,0.941335,gini,,4,20,22
8,0.940712,gini,16.0,1,20,25
13,0.93992,gini,16.0,2,2,99
69,0.939771,gini,16.0,1,20,47
70,0.93956,gini,,1,100,51


In [33]:
xgb_rscv = pd.read_csv('xgbrf_cv_results_incl_stairs.csv')
cols = ['mean_test_F1'] + [i for i in xgb_rscv.columns if 'param_' in i]
xgb_rscv.sort_values('rank_test_F1').loc[:, cols].head(5)

Unnamed: 0,mean_test_F1,param_gamma,param_importance_type,param_learning_rate,param_max_depth,param_n_estimators,param_reg_alpha,param_reg_lambda,param_tree_method
62,0.937247,1.0,gain,0.002833,10,67,1.25,2.0,hist
29,0.936439,0.0,weight,0.029959,12,61,1.0,0.1,exact
77,0.936391,0.01,weight,1.5e-05,12,73,1.0,2.0,hist
85,0.935336,0.01,weight,0.003325,11,96,1.0,1.0,hist
2,0.935229,1.0,gain,0.231022,10,59,1.0,2.0,hist


In [38]:
lgb_rscv = pd.read_csv('lgb_cv_results_incl_stairs.csv')
cols = ['mean_test_F1'] + [i for i in lgb_rscv.columns if 'param_' in i]
lgb_rscv.sort_values('rank_test_F1').loc[:, cols].head(5)

Unnamed: 0,mean_test_F1,param_boosting_type,param_learning_rate,param_max_depth,param_n_estimators,param_num_leaves
179,0.946718,goss,0.2,7,125,41
126,0.944939,dart,0.3,21,125,31
108,0.944558,dart,0.2,14,125,29
127,0.944139,dart,0.3,-1,125,26
199,0.944088,dart,0.3,21,125,30


While there is a miniscule advantage gained from using the picked parameters, only the learning rate was taken moving forward, as the performance of the default parameters is almost exactly the same

### Number of estimators

With LightGBM doing well (and its speed), the following is moving fowards with the __LightGBM being the classifier of choice__

While the RandomizedSearchCV showed that the number of estimators was best at 125, there is still a tradeoff, which should be analyzed, not only for speed but for storage space when the final model needs to be saved.

The results of testing number of estimators is shown in the below graph (parameters: *learning_rate=0.2*):

![# estimator testing](gait_classifier/lightgbm_n-estimators_performance.png "Number of Estimator performance")

The results show that performance does keep increasing as estimators are added, but also that 75 estimators results in essentially equivalent performance, and the % change in score drops off for the most part after this.

## 3. Feature Elimination

Next, testing on which/whether or not features could be removed from the model, while maintaining sufficient performance.

Using the Recursive Feature Elimination CV from sklearn, on both the LightGBM model, and a single Decision Tree, similar results were obtained - essentially all of the features are necessary to maintain performance:

![RFECV results](gait_classifier/RFECV_results.png "RFECV results")

# Feature Parameters

At this point, since several of the features have adjustable parameters (and while most of them seemed to be based on optimal/original paper values, this is not guarantee), the importance of adjusting these parameters was explored.  This first step was using Predictive Power Score to assess which features independently had the best performance towards classifying gait.

<img src="../feature_exploration/feature_parameter_pps/autocorrelation.png" alt="Autocorrelation" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/sample_entropy.png" alt="Sample Entropy" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/permutation_entropy.png" alt="Permutation Entropy" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/range_count_percentage.png" alt="Range Count Percentage" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/ratio_beyond_r_sigma.png" alt="Ratio Beyond R Sigma" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/dominant_frequency.png" alt="Dominant Frequency" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/power_spectral_sum.png" alt="Power Spectral Sum" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/spectral_entropy.png" alt="Spectral Entropy" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/spectral_flatness.png" alt="Spectral Flatness" width="600"/>
<img src="../feature_exploration/feature_parameter_pps/detail_power_ratio.png" alt="Detail Power Ratio" width="600"/>

After doing the above testing, did more testing of the LightGBM classifier with the parameters that looked more promising, and started exploring stairs back in the negative class again. The class distribution was also tweaked so that the positive and negative classes were more closely balanced, to the below distribution, that also overall has less overlap between windows:

| Activity             | Overlap |
|----------------------|---------|
| walking              | 0.40    |
| walking-impaired     | 0.20    |
| sitting              | 900     |
| standing             | 300     |
| stairs-ascending     | 0.30    |
| stairs-descending    | 0.30    |
| cycling-50W          | 0.30    |
| cycling-100W         | 0.30    |
| default              | 1.00    |


| Category    | Samples |
|-------------|---------|
| Total       | 43,163  |
| Walking     | 20,581  |
| Non-walking | 22,645  |
|-------------|---------|
| % walking   | 47.54%  |

and the following specific activity breakdown:

| Activity                 | Samples / Total  | % Samples |
|--------------------------|------------------|-----------|
| sit-to-stand             |     16 / 43,163  |  0.00     |
| standing-assisted        |    207 / 43,163  |  0.00     |
| jumping-rope             |    212 / 43,163  |  0.00     |
| vacuuming                |    311 / 43,163  |  0.01     |
| lying                    |    376 / 43,163  |  0.01     |
| elevator-descending      |    378 / 43,163  |  0.01     |
| elevator-ascending       |    475 / 43,163  |  0.01     |
| washing-dishes           |    491 / 43,163  |  0.01     |
| running                  |    541 / 43,163  |  0.01     |
| sweeping                 |    612 / 43,163  |  0.01     |
| running-treadmill        |    755 / 43,163  |  0.02     |
| washing-dishes           |    776 / 43,163  |  0.02     |
| walking-left             |    787 / 43,163  |  0.02     |
| walking-right            |    842 / 43,163  |  0.02     |
| sleeping                 |  1,126 / 43,163  |  0.03     |
| stairs-descending        |  2,477 / 43,163  |  0.06     |
| cycling-50W              |  2,509 / 43,163  |  0.06     |
| cycling-100W             |  2,515 / 43,163  |  0.06     |
| stairs-ascending         |  2,763 / 43,163  |  0.06     |
| standing                 |  2,953 / 43,163  |  0.07     |
| sitting                  |  3,152 / 43,163  |  0.07     |
| walking-impaired         |  8,241 / 43,163  |  0.19     |
| walking                  | 10,648 / 43,163  |  0.25     |

test against GaitPy existing classifier