# GMAC Optimization: Grid search of all GMAC parameters

The GMAC algorithm has the following parameters required to map the raw acceleration
data to the arm-use space. These parameters are:
1. Pitch angle threshold. $\left( \theta_{th} \right)$
2. Acceleration magnitude threshold. $\left( a_{th} \right)$

### Getting the data

If you do not already have the data to run this this notebook, 
you need to download it from here.  https://github.com/biorehab/upper-limb-use-assessment



### Standards modules

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import scipy
from scipy import signal
import pathlib
import itertools
import glob
import json

In [3]:
import seaborn as sns

In [4]:
from matplotlib.ticker import MaxNLocator

### Custom Modules

In [5]:
sys.path.append("../scripts")

import classification_algorithms as ca
import reduced_models as rm
import task_analysis as ta

import misc

In [6]:
import gmac

In [7]:
# Define the font family and size
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica']
plt.rcParams['font.weight'] = 'light'  # Use 'light' font weight


## Define notebook level constants

In [8]:
# Sampling rate for the data is 50ms
dT = 0.02
Fs = int(1 / dT)

## Read the Controls and Patients Data

In [9]:
# Read healthy and control data
left, right = misc.read_data(subject_type='control')
aff, unaff = misc.read_data(subject_type='patient')

# Assign segments for each subject
left = pd.concat([misc.assign_segments(left[left.subject == subj], dur_th=1, dT=dT)
                  for subj in left.subject.unique()], axis=0)
right = pd.concat([misc.assign_segments(right[right.subject == subj], dur_th=1, dT=dT)
                   for subj in right.subject.unique()])
aff = pd.concat([misc.assign_segments(aff[aff.subject == subj], dur_th=1, dT=dT)
                 for subj in aff.subject.unique()])
unaff = pd.concat([misc.assign_segments(unaff[unaff.subject == subj], dur_th=1, dT=dT)
                   for subj in unaff.subject.unique()])

# All limbs data ddf
datadf = {
    "left": left,
    "right": right,
    "aff": aff,
    "unaff": unaff
}

### Supporting functions

In [10]:
def compute_confusion_matrix(actual: np.array, estimated: np.array) -> np.array:
    """
    Computes the components of the confusion matrix.
    """
    actual = np.array(actual, dtype=int)
    estimated = np.array(estimated, dtype=int)
    return {
        "TN": np.sum((actual + estimated) == 0), # TN
        "FP": np.sum((1 - actual + estimated) == 2), # FP
        "FN": np.sum((actual + 1 - estimated) == 2), # FN
        "TP": np.sum((actual + estimated) == 2), # TP
    }

## Dual Threshold

#### Parameter ranges for the grid search

In [11]:
basedir = "../data/output-r1-full"

# Make directory if it does not exist
outdir = pathlib.Path(basedir, "gmac")
outdir.mkdir(parents=True, exist_ok=True)

# GMAC Parameter ranges
gmac_param_ranges = {
    "np": list(map(int, [1, Fs // 2, Fs])),
    "fc": [0.01, 0.1, 1],
    "nc": [2, 4],
    "nam": list(map(int, [1, Fs // 2, Fs])),
    "p_th": list(range(-90, 100, 10)),
    "p_th_band": list(range(0, 100, 20)),
    "am_th": [0, 0.1, 0.25],
    "am_th_band": [0]
}

# Save params file.
with open(pathlib.Path(outdir, "gmac_params.json"), "w") as f:
    json.dump(gmac_param_ranges, f, indent=4)

In [12]:
dfcols = (["subject"] + list(gmac_param_ranges.keys()) +
            ["limb"] +
            ["TN", "FP", "FN", "TP"] +
            ["TNr", "FPr", "FNr", "TPr"] +
            ["sensitivity", "specificity", "youden"])
gmac_perf_df = pd.DataFrame(columns=dfcols)
for limbkey in ["left", "right", "aff", "unaff"]:
    # Which limb?
    limbdf = datadf[limbkey]
    subjects = limbdf.subject.unique()

    # Estimate pitch for each subject.
    accl_cols = ['ax', 'ay', 'az']

    # Number of all possible combinations
    Ncombs = len(list(misc.generate_param_combinations_gmac(gmac_param_ranges)))

    # All parameter combinations.
    param_combs = list(misc.generate_param_combinations_gmac(gmac_param_ranges))

    for i, subj in enumerate(subjects):
        # Go through all parameter combinations.
        _sinx = limbdf.subject == subj
        for j, _pc in enumerate(param_combs):
            sys.stdout.write(f"\r{limbkey} {i:3d}, {j:3d} / {len(param_combs):3d}")
            df = limbdf[limbdf.subject==subj]
            # Compute gmac
            _gmac = np.hstack([
                gmac.estimate_gmac2(_df[accl_cols].values, accl_farm_inx=0, Fs=Fs,
                                    params=_pc)
                for _, _df in df.groupby('segment')
            ])
            # Compute performance
            _perf = compute_confusion_matrix(actual=df.gnd.values, estimated=_gmac)
            
            # Update gmac df
            _rowdf = pd.DataFrame({"subject": subj} | _pc | _perf, index=[0])
            _rowdf[["TNr", "FPr", "FNr", "TPr"]] = _rowdf[["TN", "FP", "FN", "TP"]] / np.sum(_rowdf[["TN", "FP", "FN", "TP"]].values)
            _rowdf["sensitivity"] = _rowdf.TP / (_rowdf.TP + _rowdf.FN)
            _rowdf["specificity"] = _rowdf.TN / (_rowdf.TN + _rowdf.FP)
            _rowdf["youden"] = _rowdf.sensitivity + _rowdf.specificity - 1
            _rowdf["limb"] = limbkey
            gmac_perf_df = pd.concat(
                (gmac_perf_df, _rowdf),
                ignore_index=True
            )
    # Save data
    gmac_perf_df.to_csv(pathlib.Path(basedir, "gmac", f"perf_gmac.csv").as_posix(), index=False)

left   8, 2846 / 153900