# Import and pre-process the EMG data

## NOTE: Raw data is not currently shared. This code is only provided for reference.

The preprocessing includes the following:
1. Import the data
2. Create the linear envelope:
   -  Band-pass filter
   - Rectification
   - Low-pass filter
3. Epoch to the steps.
4. Normalize the linear envelope

In [None]:
import os

import numpy as np
import pandas as pd
import pyarrow.feather as feather
import ray
from scipy.io import loadmat

import EssentialEMGFuncs as ess

ray.shutdown()
ray.init(dashboard_host='127.0.0.1')
save_fig = False

# set some variables
subj = input("Enter the subject number : ") or "PS25"
conds = ['presit', 'postsit', 'LEI', 'LME', 'RME', 'REI']
data_path = './data/emg/'
subj_path = data_path + subj + '/'
if not os.path.isdir(subj_path + 'figs'):
    os.mkdir(subj_path + 'figs')  # create folder for raw figures

pert_start, pert_end = 120, 480  # start and end of the perturbation period
target_freq = 1000


In [None]:
# Import raw EMG files and resample. Import stepping events.
raw_data = {c: [] for c in conds}
par_read_csv = ray.remote(pd.read_csv)

for c in conds:
    # I use ray to read all files at once
    raw_data[c] = par_read_csv.remote(subj_path + subj + '_' + c + '.csv')
for c in conds:
    raw_data[c] = ray.get(raw_data[c]).copy()
    time = []
    time = raw_data[c]['X[s]']
    raw_data[c].drop(
        (col for col in raw_data[c].columns if 'EMG' not in col), axis=1, inplace=True)
    raw_data[c]['time'] = time
    raw_data[c].dropna(inplace=True)
    raw_data[c] = ess.resamp_dataframe(raw_data[c], target_freq)

# We also need matlab step time
step_time = {c: [] for c in conds}
for c in conds[2:]:
    # c = 'LEI'  # to save some time for debugging
    data = loadmat(subj_path + 'times/' + subj + '_' + c + '_time.mat')
    step_time[c] = pd.DataFrame(
        data=data['strideTime'], columns=['step_index', 'type', 'start', 'pert', 'other', 'end'])


In [None]:
# Create linear envelopes
linear_envelope = {c: [] for c in conds}
for c in conds:
    # c = 'RME'  # to save some time for debugging
    # plotting the four steps of the making linear envelope is supported. You just need to turn it on.
    linear_envelope[c] = ess.create_envelope(
        raw_data[c], fs=1000, band_pass=(30, 200), low_pass=20,
        make_fig=False, muscle_idx_to_plot=10, plot_time=np.arange(110000, 130000))

In [None]:
# Create step-normalized profiles
# Linear envelopes can be normalized to the step time because they don't have any frequency value.
# Here, normalization does both epoching and warping.
# Also the assumption is that the step_time and the EMG time are synced.
par_normal_profile = ray.remote(ess.create_normal_profile)
normal_envelope = {c: [] for c in conds}
target_length = 2000
for c in conds[2:]:
    # c = 'LEI'  # to save some time for debugging
    # normal_envelope[c] = ess.create_normal_profile(
    #     linear_envelope[c], step_time[c], target_length)  # this is the non-parallel version
    normal_envelope[c] = par_normal_profile.remote(linear_envelope[c], step_time[c], target_length)
for c in conds[2:]:
    normal_envelope[c] = ray.get(normal_envelope[c]).copy()
    # .copy() releases the remote memory occupied by Ray object.

In [None]:
# Plot and save average plots
muscle_names = normal_envelope[c].columns.get_level_values('muscle').unique().to_list()
muscle_titles = list(map(lambda x: x.split(':')[0].lower(), muscle_names))
muscle_pairs = {np.int(i): mp for (i, mp) in zip(np.arange(np.floor(len(muscle_names) / 2)),
                zip(muscle_names[:-1:2], muscle_names[1::2]))}
muscle_pair_titles = {np.int(i): mp for (i, mp) in zip(np.arange(np.floor(len(muscle_titles) / 2)),
                      zip(muscle_titles[:-1:2], muscle_titles[1::2]))}

for c in conds[2:]:
    fig = ess.plot_mean(
        normal_envelope[c], muscle_pairs, titles=muscle_pair_titles, plot_CI=True, CI_method='bootstrap')
    fig.suptitle(subj + ' ' + c + ' : raw EMG over total strides')
    # if save_fig:  # Having non-normalized plots is not necessary
    #     fig.savefig(subj_path+'figs/'+subj+ '_raw-EMG-envelope_timeNormalized_'+c+'.png')
    #     fig.savefig(subj_path+'figs/'+subj+ '_raw-EMG-envelope_timeNormalized_'+c+'.pdf')


## Grand normalization
We need to normalize the EMG signals based on the average or maximum. I think because different muscles might be recruited most at a certain task, a global normalization across task is more meaningful.

In [None]:
gNormalized = normal_envelope.copy()
if 'presit' in gNormalized.keys():
    del gNormalized['presit']
if 'postsit' in gNormalized.keys():
    del gNormalized['postsit']
concat_tasks = pd.concat(list(gNormalized.values()), axis=1, join='inner')
mean_activation = concat_tasks.mean(axis=1, level=0).mean(axis=0)
for c in conds[2:]:
    for m in muscle_names[:-1]:
        gNormalized[c][m] = gNormalized[c][m] / mean_activation[m]
    fig = ess.plot_mean(gNormalized[c], muscle_pairs, titles=muscle_pair_titles, plot_CI=True, CI_method='bootstrap')
    fig.suptitle(subj + ' ' + c + ' : Mean-Normalized EMG over total strides')
    if save_fig:
        fig.savefig(subj_path + 'figs/' + subj + '_meanNormalized-EMG-envelope_timeNormalized_' + c + '.png')
        fig.savefig(subj_path + 'figs/' + subj + '_meanNormalized-EMG-envelope_timeNormalized_' + c + '.pdf')


In [None]:
# Save raw_data, and normal envelope
# I can't save the linear envelope because it is not a data frame here.
for c in conds:
    raw_data[c].to_feather(f'{subj_path}{subj}_{c}_raw.feather')

for c in conds[2:]:
    # feather does not accept non-string column names. So we need to convert it.
    normal_envelope[c].rename(
        columns={t: str(t) for t in normal_envelope[c].columns.levels[1]}, inplace=True)
    feather.write_feather(
        normal_envelope[c], f'{subj_path}{subj}_{c}_normalEnvelope.feather')
    normal_envelope[c].rename(
        columns={t: int(t) for t in normal_envelope[c].columns.levels[1]}, inplace=True)