# GMAC Pitch: Grid search of GMAC parameters

The pitch estimation for the GMAC algorithm has the following parameters:
1. Moving average window size for pitch estimation. $\left( n_{p} \right)$

### Getting the data

If you do not already have the data to run this this notebook, you need to download it from here.  https://github.com/biorehab/upper-limb-use-assessment

### Standards modules

In [1]:
%reload_ext autoreload
%autoreload 2

In [13]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import scipy
from scipy import signal
import pathlib
import itertools
import glob
import json

In [3]:
import seaborn as sns

### Custom Modules

In [4]:
sys.path.append("../scripts")

import classification_algorithms as ca
import reduced_models as rm
import task_analysis as ta

import misc

In [5]:
import gmac

In [6]:
# Define the font family and size
# font = {'family': 'sans-serif', 'sans-serif': ['Helvetica']}

# # Set the font properties for various elements
# plt.rc('font', **font)

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica']
plt.rcParams['font.weight'] = 'light'  # Use 'light' font weight


## Define notebook level constants

In [7]:
# Sampling rate for the data is 50ms
dT = 0.02
Fs = int(1 / dT)

## Read the Controls and Patients Data

In [8]:
# Read healthy and control data
left, right = misc.read_data(subject_type='control')
aff, unaff = misc.read_data(subject_type='patient')

# Assign segments for each subject
left = pd.concat([misc.assign_segments(left[left.subject == subj], dur_th=1, dT=dT)
                  for subj in left.subject.unique()], axis=0)
right = pd.concat([misc.assign_segments(right[right.subject == subj], dur_th=1, dT=dT)
                   for subj in right.subject.unique()])
aff = pd.concat([misc.assign_segments(aff[aff.subject == subj], dur_th=1, dT=dT)
                 for subj in aff.subject.unique()])
unaff = pd.concat([misc.assign_segments(unaff[unaff.subject == subj], dur_th=1, dT=dT)
                   for subj in unaff.subject.unique()])

# All limbs data ddf
datadf = {
    "left": left,
    "right": right,
    "aff": aff,
    "unaff": unaff
}

#### Parameter ranges for the grid search

In [15]:
basedir = "../data/output"

# Make directory if it does not exist
outdir = pathlib.Path(basedir, "pitch")
outdir.mkdir(parents=True, exist_ok=True)


# GMAC Parameter ranges
pitch_param_ranges = {
    "np": list(map(int, [1, Fs // 2, Fs, 2*Fs, 4*Fs, 8*Fs])),
}

# Save params file.
with open(pathlib.Path(outdir, "pitch_params.json"), "w") as f:
    json.dump(pitch_param_ranges, f, indent=4)

# Number of all possible combinations
Ncombs = len(pitch_param_ranges["np"])

In [10]:
for limbkey in ["left", "right", "aff", "unaff"]:
    # Which limb?
    limbdf = datadf[limbkey]
    subjects = limbdf.subject.unique()
    
    # Estimate pitch for each subject.
    accl_cols = ['ax', 'ay', 'az']
    dfcols = ["subject"] + [f"np{int(_np)}" for _np in pitch_param_ranges["np"]] + ["pitch"]
    pitch_df = pd.DataFrame(columns=dfcols)

    for subj in subjects:
        # Go through all parameter combinations.
        _sinx = limbdf.subject == subj
        subj_pitch = np.zeros((len(limbdf[_sinx]), Ncombs))
        for _npinx, _np in enumerate(pitch_param_ranges["np"]):
            sys.stdout.write(f"\rEstimating pitch for subject {subj} {int(_np)}")
            sys.stdout.flush()
            # All segments
            segs = limbdf[_sinx].segment.unique()
            _pitch = [gmac.estimate_pitch(
                        accl=limbdf.loc[_sinx & (limbdf.segment == _seg), accl_cols].values,
                        farm_inx=0,
                        nwin=int(_np))
                    for _seg in segs]
            # Add column to subject pitch data
            subj_pitch[:, _npinx] = np.hstack(_pitch)
        # Add the groundtruth from IMU and subj name
        subj_pitch = np.hstack((subj * np.ones((subj_pitch.shape[0], 1)),
                                subj_pitch,
                                limbdf[_sinx].pitch.values.reshape(-1, 1)))
        # Add to dataframe
        pitch_df = pd.concat((pitch_df, pd.DataFrame(subj_pitch, columns=dfcols)),
                            ignore_index=True)    

    # Save data
    pitch_df.to_csv(pathlib.Path(outdir, f"raw_pitch_{limbkey}.csv"), index=False)

Estimating pitch for subject 2 100

Estimating pitch for subject 5 4000

## Choosing the best filter parameter for pitch estimation

In [11]:
# Read data
pitch_df = {
    lmb: pd.read_csv(pathlib.Path(outdir, f"raw_pitch_{lmb}.csv"))
    for lmb in ['left', 'right', 'aff', 'unaff']
}

In [12]:
# # Compute performance of pitch estimation
_limbdf = pd.DataFrame(columns=["subject", "np", "err", "corr", "limb"])
for _lmb in ["left", "right", "aff", "unaff"]:
    _df = pitch_df[_lmb]
    for i, _subj in enumerate(_df.subject.unique()):
        _sinx = _df.subject == _subj
        _gt = _df.loc[_df.subject == _subj, "pitch"].values
        for j, _np in enumerate(pitch_param_ranges["np"]):
            # Compute the 95th percentile absolute error
            _err = np.percentile(np.abs(_df.loc[_sinx, f"np{_np}"].values - _gt), 50)
            # Correlation coefficient
            _corr = scipy.stats.pearsonr(_df.loc[_sinx, f"np{_np}"].values, _gt)[0]
            # Temporary df
            _tempdf = pd.DataFrame([[_subj, _np, _err, _corr, _lmb]], columns=_limbdf.columns)
            _limbdf = pd.concat((_limbdf, _tempdf), ignore_index=True)

# Save data
_limbdf.to_csv(pathlib.Path(outdir, "perf_pitch_est.csv"), index=False)