# GMAC Optimization: Grid search of GMAC parameters (intra-subject)

The GMAC algorithm has the following parameters required to map the raw acceleration
data to the arm-use space. These parameters are:
1. Pitch angle threshold. $\left( \theta_{th} \right)$
2. Acceleration magnitude threshold. $\left( a_{th} \right)$

### Getting the data

If you do not already have the data to run this this notebook, you need to download it from here.  https://github.com/biorehab/upper-limb-use-assessment



### Standards modules

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import scipy
from scipy import signal
import pathlib
import itertools
import glob
import json

In [3]:
import seaborn as sns

In [4]:
from matplotlib.ticker import MaxNLocator

### Custom Modules

In [5]:
sys.path.append("../scripts")

import classification_algorithms as ca
import reduced_models as rm
import task_analysis as ta

import misc

In [6]:
import gmac

In [7]:
# Define the font family and size
# font = {'family': 'sans-serif', 'sans-serif': ['Helvetica']}

# # Set the font properties for various elements
# plt.rc('font', **font)

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica']
plt.rcParams['font.weight'] = 'light'  # Use 'light' font weight


## Define notebook level constants

In [8]:
# Sampling rate for the data is 50ms
dT = 0.02
Fs = int(1 / dT)

## Read the Controls and Patients Data

In [9]:
# Read healthy and control data
left, right = misc.read_data(subject_type='control')
aff, unaff = misc.read_data(subject_type='patient')

# Assign segments for each subject
left = pd.concat([misc.assign_segments(left[left.subject == subj], dur_th=1, dT=dT)
                  for subj in left.subject.unique()], axis=0)
right = pd.concat([misc.assign_segments(right[right.subject == subj], dur_th=1, dT=dT)
                   for subj in right.subject.unique()])
aff = pd.concat([misc.assign_segments(aff[aff.subject == subj], dur_th=1, dT=dT)
                 for subj in aff.subject.unique()])
unaff = pd.concat([misc.assign_segments(unaff[unaff.subject == subj], dur_th=1, dT=dT)
                   for subj in unaff.subject.unique()])

# All limbs data ddf
datadf = {
    "left": left,
    "right": right,
    "aff": aff,
    "unaff": unaff
}

### Functions implementing the GMAC algorithm

In [10]:
def compute_confusion_matrix(actual: np.array, estimated: np.array) -> np.array:
    """
    Computes the components of the confusion matrix.
    """
    actual = np.array(actual, dtype=int)
    estimated = np.array(estimated, dtype=int)
    return {
        "TN": np.sum((actual + estimated) == 0), # TN
        "FP": np.sum((1 - actual + estimated) == 2), # FP
        "FN": np.sum((actual + 1 - estimated) == 2), # FN
        "TP": np.sum((actual + estimated) == 2), # TP
    }

## Dual Threshold

#### Parameter ranges for the grid search

In [11]:
basedir = "../data/output"

# Make directory if it does not exist
outdir = pathlib.Path(basedir, "gmac")
outdir.mkdir(parents=True, exist_ok=True)

# GMAC Parameter ranges
gmac_param_ranges = {
    "np": [int(Fs // 2)],
    "fc": [0.1],
    "nc": [1],
    "nam": [int(5*Fs)],
    "p_th": list(range(-90, 100, 10)),
    "p_th_band": list(range(0, 100, 20)),
    "am_th": [0, 0.1, 0.25, 0.5],
    "am_th_band": [0]
}

# Save params file.
with open(pathlib.Path(outdir, "gmac_params.json"), "w") as f:
    json.dump(gmac_param_ranges, f, indent=4)

#### Compute UL use
Do the same thing as the inter-subject model by computing the raw UL use 
signals for different subjects and different parameter combinations

In [12]:
dfcols = (["subject", "limb", "pcinx", "gmac", "gt"])
for limbkey in ["left", "right", "aff", "unaff"]:
    # Which limb?
    limbdf = datadf[limbkey]
    subjects = limbdf.subject.unique()

    # Estimate pitch for each subject.
    accl_cols = ['ax', 'ay', 'az']

    # Number of all possible combinations
    Ncombs = len(list(misc.generate_param_combinations_gmac(gmac_param_ranges)))

    # All parameter combinations.
    param_combs = list(misc.generate_param_combinations_gmac(gmac_param_ranges))

    for i, subj in enumerate(subjects):
        gmac_raw_df = pd.DataFrame(columns=dfcols)
        # Go through all parameter combinations.
        _sinx = limbdf.subject == subj
        for j, _pc in enumerate(param_combs):
            sys.stdout.write(f"\r{limbkey} {i:3d}, {j:3d} / {len(param_combs):3d}")
            df = limbdf[limbdf.subject==subj]
            # Compute gmac
            _gmac = np.hstack([
                gmac.estimate_gmac2(_df[accl_cols].values, accl_farm_inx=0, Fs=Fs,
                                    params=_pc)
                for _, _df in df.groupby('segment')
            ])
            # Raw data df
            _data = np.hstack((df.gnd.values.reshape(-1, 1), _gmac.reshape(-1, 1)))
            _rawdf = pd.DataFrame(data=_data, columns=["gt", "gmac"])
            _rawdf["subject"] = subj
            _rawdf["limb"] = limbkey
            _rawdf["pcinx"] = j
            # Update gmac df
            gmac_raw_df = pd.concat(
                (gmac_raw_df, _rawdf),
                ignore_index=True
            )
        # Save data
        gmac_raw_df.to_csv(pathlib.Path(basedir, "gmac", f"raw_gmac_{limbkey}_{subj}.csv").as_posix(), index=False)

left   1, 307 / 380

KeyboardInterrupt: 

In [126]:
# Function to generate segment indices
def get_inx(N, M, Ntest):
    return np.any(
        np.vstack((np.array(range(N)) < (Ntest * N // M),
                   np.array(range(N)) >= ((Ntest + 1) * N // M))),
        axis=0
    )
Kfolds = 10
dfcols = (["subject"] + list(gmac_param_ranges.keys()) +
            ["limb"] + ["segment", "type"] + 
            ["TN", "FP", "FN", "TP"] +
            ["TNr", "FPr", "FNr", "TPr"] +
            ["sensitivity", "specificity", "youden"])
gmac_intra_perf_df = pd.DataFrame(columns=dfcols)
for limbkey in ["left", "right", "aff", "unaff"]:
    # Which limb?
    limbdf = datadf[limbkey]
    # Files for each limb
    lfiles = glob.glob(pathlib.Path(basedir, "gmac", f"raw_gmac_{limbkey}_*.csv").as_posix())
    for i, _lf in enumerate(lfiles):
        # Read file
        _rawdf = pd.read_csv(_lf)
        # Subject name
        _subj = _rawdf.loc[0, "subject"]
        # Ground truth df
        _dfgnd = limbdf[limbdf.subject==_subj].reset_index(drop=True)
        # Go through different parameter combinations
        # All parameter combinations.
        param_combs = list(misc.generate_param_combinations_gmac(gmac_param_ranges))
        for j, _pc in enumerate(param_combs):
            # Get the data for this parameter combination
            _dfgmac = _rawdf[_rawdf.pcinx == j].reset_index(drop=True)
            # Get segment indices
            for k in range(Kfolds):
                sys.stdout.write(f"\r{limbkey} {i:3d}, {j:3d} / {len(param_combs):3d}, {k:2d}")
                _traininx = get_inx(len(_dfgmac), Kfolds, k)
                # Compute training performance
                _perftrain = compute_confusion_matrix(
                    _dfgnd.gnd.values[_traininx],
                    _dfgmac.gmac.values[_traininx]
                )
                # Create training row
                _traindf = pd.DataFrame({"subject": _subj} | _pc | _perftrain, index=[0])
                _traindf[["TNr", "FPr", "FNr", "TPr"]] = _traindf[["TN", "FP", "FN", "TP"]] / np.sum(_traindf[["TN", "FP", "FN", "TP"]].values)
                _traindf["sensitivity"] = _traindf.TP / (_traindf.TP + _traindf.FN)
                _traindf["specificity"] = _traindf.TN / (_traindf.TN + _traindf.FP)
                _traindf["youden"] = _traindf.sensitivity + _traindf.specificity - 1
                _traindf["limb"] = limbkey
                _traindf["segment"] = k
                _traindf["type"] = "train"
                # Compute testing performance
                _perftest = compute_confusion_matrix(
                    _dfgnd.gnd.values[~_traininx],
                    _dfgmac.gmac.values[~_traininx]
                )
                # Create testing row
                _testdf = pd.DataFrame({"subject": _subj} | _pc | _perftest, index=[0])
                _testdf[["TNr", "FPr", "FNr", "TPr"]] = _testdf[["TN", "FP", "FN", "TP"]] / np.sum(_testdf[["TN", "FP", "FN", "TP"]].values)
                _testdf["sensitivity"] = _testdf.TP / (_testdf.TP + _testdf.FN)
                _testdf["specificity"] = _testdf.TN / (_testdf.TN + _testdf.FP)
                _testdf["youden"] = _testdf.sensitivity + _testdf.specificity - 1
                _testdf["limb"] = limbkey
                _testdf["segment"] = k
                _testdf["type"] = "test"
                # Update gmac
                gmac_intra_perf_df = pd.concat(
                    (gmac_intra_perf_df, _traindf, _testdf),
                    ignore_index=True
                )

# Save performance
gmac_intra_perf_df.to_csv(pathlib.Path(basedir, "gmac", f"perf_gmac_intra.csv").as_posix(), index=False)

unaff   4, 379 / 380,  9