In [None]:
import mne
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from ev_parser import gesture_parser
from functools import partial
from scipy import interpolate
from sklearn.preprocessing import StandardScaler

path_out = 'C:/skoltech_hand_writing'

%matplotlib inline

In [None]:
### SCRIPT "2_Gestures_Test_all.ipynb"
subj = ''
filenames = os.listdir(f'{path_out}/hand_writing/{subj}')

#define missing epochs (empirically found) in the following way:
#  dict(subj = [session_with_missing_epoch, id_of_missing_epoch])
missing_epochs = {'S102': [5, 15],
                  'S106': [2, 30],
                  'S107': [4, 40]}
for code in np.arange(1, 6):
    gesture_name = filenames[code - 1]
    skiprows=22

    df_gesture = pd.read_csv(f'{path_out}/hand_writing/{subj}/{gesture_name}', header=None, delim_whitespace=True, skiprows=skiprows)
    assert((df_gesture[0]=='[').all())
    df_gesture.drop(columns=[0], inplace=True)
    df_gesture['ts'] = df_gesture[1].str.replace(']', '').astype('float')
    df_gesture.rename(columns={2:'INPUT', 3:'TYPE', 4:'KEY', 5:'VALUE'}, inplace=True)
    df_gesture.ts -= df_gesture.ts.min()

    # Track RESET triggers
    df_gesture['Reset'] = False
    df_gesture.loc[df_gesture.query('KEY=="ABS_MT_TRACKING_ID" & VALUE=="7fffffff"').index, 'Reset']=True
    df_gesture['ResetCounter'] = np.cumsum(df_gesture.Reset)

    #### DIVIDING DATASET INTO TRIGGERS AND GESTURES:
    ### Triggers:::
    marks=df_gesture[(df_gesture['KEY']=="ABS_MT_TRACKING_ID")&(df_gesture['VALUE']=="7fffffff")][['INPUT','TYPE','ts','ResetCounter']]
    marks['Epoch']=(marks['ResetCounter']-1)//4
    marks['X']=-1
    marks['Y']=-1

    ### Gestures:::
    df_motion = df_gesture.query('(KEY == "ABS_MT_POSITION_X") or (KEY == "ABS_MT_POSITION_Y")').copy()
    df_motion.loc[:, 'MAGNITUDE'] = df_motion['VALUE'].apply(partial(int, base=16))
    df_motion['X'] = None
    df_motion['Y'] = None
    # fill in motion coordinates:
    mask = df_motion.KEY == 'ABS_MT_POSITION_X'
    df_motion.loc[mask, 'X'] = df_motion.loc[mask, 'MAGNITUDE']

    mask = df_motion.KEY == 'ABS_MT_POSITION_Y'
    df_motion.loc[mask, 'Y'] = df_motion.loc[mask, 'MAGNITUDE']
    df_motion.fillna(method='ffill', inplace=True)
    df_motion.dropna(inplace=True)
    df_motion.drop_duplicates(subset=[1], keep='last', inplace=True)
    df_motion.reset_index(drop=True, inplace=True)
    df_motion.drop(columns=[1, 'TYPE', 'KEY', 'VALUE', 'MAGNITUDE', 'Reset'], inplace=True)

    df_motion['Epoch'] = df_motion.ResetCounter//4
    prot_name = filenames[code - 1].split('.')[0].split('_')[-1]
    # for additional validation of the triggers
    df_protocol=pd.read_excel(f'8_Sessions/Digits_Tyumen_{code}.xlsx')

    # exclude excessive epochs from both datasets
    df_motion=df_motion[df_motion['Epoch']>-1]
    df_motion=df_motion[df_motion['Epoch']<=49]
    marks=marks[marks['Epoch']<=49]

    # comment if not needed:
    #=== TO PLOT MOTION TRAJECTORY ===
    %matplotlib inline
    trials_times=[]
    
    fig, axx = plt.subplots(10,5, sharex=True, sharey=True, figsize=(15,40) )
    for ia, (ep, group) in enumerate(df_motion.groupby('Epoch')):

        #extracting starting point of the trial and last point of trial
        trial_duration=round(group['ts'].values[-1]-group['ts'].values[0],2)
        trials_times.append(trial_duration)
        
        subtitle='№'+str(ep) +', '+str(df_protocol['Digit_text'].values[ia])+' ,'+str(trial_duration)+'s'
        
        
        color='r' if trial_duration>2 else 'b'
        
        #print(color)
        
        plt.sca(axx.reshape(-1)[ia])
        plt.plot(*(group.loc[:, ['X', 'Y']].values.T), color+'.', ms=1)
        #plt.ylabel(df_protocol.loc[ia, '0'])
        plt.axis(False) 
        plt.title(subtitle)


    plt.gca().invert_yaxis()

    plt.tight_layout()
    # =======================================

    ### Part to substiitute for the missing epochs in the data
    # just filling in the gap between two epochs with the mean time:
    # e.g., if the missing epoch is epoch number 15, I use time points from epoch 14 and 16, find mean between them
    # and add an epoch of 0 values to fill in the space and make the script work
    missed_epo_list = missing_epochs[subj]
    if code == missed_epo_list[0] + 1:
        missed_epo = missed_epo_list[1]
        dd = np.zeros((2, df_motion.shape[-1]))
        epo_15 = pd.DataFrame(data = dd, columns = df_motion.columns, index = np.arange(len(df_motion), len(df_motion) + len(dd)))
        epo_15.Epoch = missed_epo
        time_diff = (df_motion[df_motion.Epoch == missed_epo+1].ts.values[0] - df_motion[df_motion.Epoch == missed_epo - 1].ts.values[-1]) / 2
        epo_15.ts = df_motion[df_motion.Epoch == missed_epo + 1].ts.values[0] - time_diff

        df_motion_full = pd.concat([df_motion, epo_15], axis = 0)
        df_motion_full.reset_index(drop=True, inplace=True)
        df_motion_full = df_motion_full.sort_values(by = 'ts')

        trials_times=[]

        # PLOT TRAJECTORIES TO CHECK THE RESULTS: THERE SHOULD BE A BLANK SPACE WHERE THE MISSING EPOCH WAS
        fig, axx = plt.subplots(10,5, sharex=True, sharey=True, figsize=(15,40) )
        for ia, (ep, group) in enumerate(df_motion_full.groupby('Epoch')):

            #extracting starting point of the trial and last point of trial
            trial_duration=round(group['ts'].values[-1]-group['ts'].values[0],2)
            trials_times.append(trial_duration)
            
            subtitle='№'+str(ep) +', '+str(df_protocol['Digit_text'].values[ia])+' ,'+str(trial_duration)+'s'
            
            
            color='r' if trial_duration>2 else 'b'
            
            plt.sca(axx.reshape(-1)[ia])
            plt.plot(*(group.loc[:, ['X', 'Y']].values.T), color+'.', ms=1)
            plt.axis(False) 
            plt.title(subtitle)


        plt.gca().invert_yaxis()

        plt.tight_layout()
        # =====================================================================================================

        df_motion = df_motion_full

    os.makedirs(f'{path_out}/hand_writing/{subj}/preprocessed/Rec_{code}/', exist_ok=True)
    df_motion.to_csv(f'{path_out}/hand_writing/{subj}/preprocessed/Rec_{code}/trials_processed.csv', sep=';')
    marks.to_csv(f'{path_out}/hand_writing/{subj}/preprocessed/Rec_{code}/marks_processed.csv', sep=';')