# Analyze sessions in batch from Phase 1 of AdaDrive (work in progress)

In [1]:
import sys
from pathlib import Path

# setting path
sys.path.append('..')
import mne
import matplotlib
from mna.utils.rnapp_data_format import read_all_lslpresets, return_metadata_from_name, event_data_from_data
import pickle, os
import pandas as pd
import numpy as np
import seaborn as sns
from IPython.display import display
from scipy.io import savemat
import scipy
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
from mna.utils.rnapp_data_format import read_all_lslpresets, return_metadata_from_name, event_data_from_data
import pickle
from statannotations.Annotator import Annotator
from collections import defaultdict
from scipy import stats
import mne
import glob 
import random
import re
import statsmodels.formula.api as smf
import statsmodels as sm
from scipy.stats import spearmanr
# matplotlib.use('Qt5Agg')
%matplotlib inline
plt.style.use('default')

from mna.utils.rnapp_data_format import read_all_files
# 1. Read a RN App, converted pkl file, and create the metadata and data structure

# Aux functions, read files

In [2]:
# loop over the list of csv files
def read_motor_csvs():
    csv_files = glob.glob(os.path.join(output_dir, "ppid*_motor.csv"))
    all_dfs = None
    for f in csv_files:
        # read the csv file
        if not type(all_dfs)==pd.core.frame.DataFrame:
            all_dfs = pd.read_csv(f)
        else:
            all_dfs = pd.concat([all_dfs, pd.read_csv(f)], ignore_index=True)
    all_dfs = all_dfs[all_dfs.columns.drop(list(all_dfs.filter(regex='Unnamed')))]
    return all_dfs

def get_motor_epochs():
    epochs_files = glob.glob(os.path.join(output_dir, "**/*ica_epochs.pickle"), recursive=True)
    motor_epochs = []
    for each_file in epochs_files:
        motor_epochs.append(pickle.load(open(each_file, 'rb')))
    motor_epochs = mne.concatenate_epochs(motor_epochs)
    for col in ['ppid','session','block','number_in_block','trial']:
        motor_epochs.metadata[col] = motor_epochs.metadata[col].astype(int)
    return motor_epochs

def get_motor_intensity_info(input_df):
    
    def str_list_to_list(lst):
        str_single_space = re.sub("\s+", " ", lst.strip())
        str_no_brackets = re.sub("[\[\]]", "", lst)
        return [float(n) for n in str_no_brackets.split()]
    
    try:
        all_steer_events = input_df['post_steer_event_raw']
        all_steer_events_finalized = all_steer_events.apply(str_list_to_list)
    except:
        all_steer_events_finalized = input_df['post_steer_event_raw']
    norm_pos = lambda wheel_pos: np.asarray(wheel_pos)/np.asarray(wheel_pos[0])
    final_pos = lambda final_wheel_pos: np.asarray(final_wheel_pos[-1])-np.asarray(final_wheel_pos[0])

    norm_pos_df = all_steer_events_finalized.apply(norm_pos)
    final_pos_df = abs(all_steer_events_finalized.apply(final_pos))
    input_df["Steer_Wheel_Degree"] = abs(all_steer_events_finalized.apply(final_pos))
    all_dfs = []
    for sub in input_df.ppid.unique():
        sub_df = input_df[input_df.ppid==sub]
        sub_df["Steer_Wheel_Degree_Categorical"] = pd.qcut(sub_df.Steer_Wheel_Degree, 2, labels=["Low", "High"]) #2=High, 1 =Low
        sub_df["Steer_Wheel_Degree_Encoded"] = sub_df.Steer_Wheel_Degree_Categorical.replace({'High': 2, 'Low': 1})
        all_dfs.append(sub_df)
    return pd.concat(all_dfs).reset_index(drop=True)


def str_list_to_list(lst):
    str_single_space = re.sub("\s+", " ", lst.strip())
    str_no_brackets = re.sub("[\[\]]", "", lst)
    return [float(n) for n in str_no_brackets.split()]

In [3]:
output_dir = '../output/batch_analysis_non_baseline_non_averaged/'
remove_sessions = [(15,1),(22,1)]
rel_regions = {'premotor_regions': ['FC3', 'FC1', 'FCz', 'FC2', 'FC4'], 'dorsolateral_prefrontal': ['AF3', 'AFz', 'AF4'], 'intermediate_frontal': ['F3', 'F1', 'Fz', 'F2', 'F4']}
all_regions = sum(rel_regions.values(),[])

pupil_df = pd.read_csv(f"../output/pupil_exposure/participant_level_exposure_fits.csv")
trial_dfs = pd.read_csv(f"{output_dir}all_results.csv")
motor_dfs = read_motor_csvs()
motor_dfs['post_steer_event_raw'] = motor_dfs['post_steer_event_raw'].apply(str_list_to_list)
motor_epochs = get_motor_epochs()

Adding metadata with 77 columns
6905 matching events found
No baseline correction applied
Created an SSP operator (subspace dimension = 1)


  motor_epochs = mne.concatenate_epochs(motor_epochs)


# Clean up dfs

In [4]:
# seaborn
sns.set(font_scale=1.2)
sns.set_palette("tab10")
from mna.utils.batch_feature_extraction import clean_up_adadrive_trials

motor_outlier_cols = ['abs_sum_delta_steer_input']
cols_to_outlier_detect = ['bpm', 'sdnn', 'rmssd', 'pnn50']
experimental_cols = ['spoken_difficulty', 'trial_duration', 'density', 'trial_damage']
eye_cols = ['Left Pupil Diameter', "NSLR_count_Fixation", "NSLR_count_Saccade",
            'NSLR_mean_duration_Fixation', 'NSLR_mean_duration_Saccade',
            'NSLR_first_onset_Fixation', 'NSLR_first_onset_Saccade']
ecg_cols = ['bpm', 'sdnn', 'rmssd', 'pnn50']  # rmssd = parasympathetic
motor_cols = ['abs_sum_delta_steer_input', 'abs_sum_delta_brake_input', 'abs_sum_delta_throttle_input']


def clean_up_trials(input_df):
    all_dfs_final = clean_up_adadrive_trials(input_df.copy())
    # damage change
    all_dfs_final = all_dfs_final.sort_values(by=['ppid', 'session', 'block', 'trial'])
    # nan, outliers
    #for col in motor_outlier_cols:
    #    all_dfs_final[col] = all_dfs_final[col].mask(all_dfs_final[col].sub(all_dfs_final[col].mean()).div(all_dfs_final[col].std()).abs().gt(2))
    #all_dfs_final['abs_sum_delta_brake_input'] = all_dfs_final['abs_sum_delta_brake_input'].mask(all_dfs_final['abs_sum_delta_brake_input']>.1)

    all_dfs_final['NSLR_first_onset_Fixation'] = all_dfs_final['NSLR_first_onset_Fixation'] - all_dfs_final[
        'trial_start_time']
    all_dfs_final['NSLR_first_onset_Saccade'] = all_dfs_final['NSLR_first_onset_Saccade'] - all_dfs_final[
        'trial_start_time']

    all_dfs_final[
        'throttle_over_brake'] = all_dfs_final.abs_sum_delta_throttle_input / all_dfs_final.abs_sum_delta_brake_input
    return all_dfs_final


trial_dfs = clean_up_trials(trial_dfs)
trial_dfs = trial_dfs.loc[~trial_dfs.ppid_session.isin([f"{es[0]}_{es[1]}" for es in remove_sessions])]
motor_dfs = clean_up_trials(motor_dfs)

# luminance effect removal from pupil diameter
trial_dfs['Raw Left Pupil Diameter'] = trial_dfs['Left Pupil Diameter']
motor_dfs['Raw Left Pupil Diameter'] = motor_dfs['Left Pupil Diameter']
p_val_criteria = 0.05
for index, row in trial_dfs.reset_index(drop=True).iloc[1:].iterrows():
    last_ppid = trial_dfs.iloc[index - 1].ppid
    last_session = trial_dfs.iloc[index - 1].session
    last_trial = trial_dfs.iloc[index - 1].trial
    last_opacity = trial_dfs.iloc[index - 1].density
    if ((row.ppid == last_ppid) & (row.session == last_session) & (row.trial == last_trial + 1)):  # if continuous
        # if there is a significant effect of opacity on pupil
        if pupil_df.loc[pupil_df['sub'] == last_ppid, 'p_opacities'].values < p_val_criteria:
            this_opacity = row.density
            this_pupil_diameter = row['Left Pupil Diameter']
            weight = pupil_df.loc[pupil_df['sub'] == last_ppid, 'w_opacities']
            adjustment = (this_opacity - last_opacity) * weight
            trial_dfs.iloc[index, trial_dfs.columns.get_loc('Left Pupil Diameter')] -= adjustment
            motor_dfs.loc[(motor_dfs.ppid == last_ppid) & (motor_dfs.session == last_session) & (
                        motor_dfs.trial == last_trial + 1), 'Left Pupil Diameter'] -= adjustment  # update motor df too
            motor_epochs.metadata.loc[(motor_epochs.metadata.ppid == last_ppid) &
                                      (motor_epochs.metadata.session == last_session) &
                                      (
                                                  motor_epochs.metadata.trial == last_trial + 1), 'Left Pupil Diameter'] += adjustment  # update motor epochs too
# pupil bins
motor_dfs['pupil_bin'] = motor_dfs.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=['low', 'high']))
trial_dfs['pupil_bin'] = trial_dfs.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=['low', 'high']))
motor_epochs.metadata['pupil_bin'] = motor_epochs.metadata.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=['low', 'high']))
motor_dfs['pupil_bin_encoded'] = motor_dfs.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=[0, 1]))
trial_dfs['pupil_bin_encoded'] = trial_dfs.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=[0, 1]))
motor_epochs.metadata['pupil_bin_encoded'] = motor_epochs.metadata.groupby(['ppid'])['Left Pupil Diameter'].transform(
    lambda x: pd.qcut(x, 2, labels=[0, 1]))
preturn = 1000
motor_epochs.apply_baseline((-(preturn / 1000), -((preturn - 250) / 1000)))

# participant-level binning of motor data, replaces the session-level info already there
motor_dfs = get_motor_intensity_info(motor_dfs)
motor_epochs.metadata = get_motor_intensity_info(motor_epochs.metadata)

Applying baseline correction (mode: mean)
Replacing existing metadata with 79 columns


# Export to MATLAB

In [5]:
data_dir = "../data/"
lsl_dir = "../mna/LSLPresets/"
vid_dir = '../data/videos/'
output_path = '../output/matlab_exports/'
Path(output_path).mkdir(parents=True, exist_ok=True)

onlyfiles = [f for f in listdir(data_dir) if isfile(join(data_dir, f)) and '.pkl' in f]
file_to_sess = {f: (int(f.rsplit('Sbj_',1)[1].split('-')[0]),int(f.rsplit('Ssn_',1)[1].split('.')[0])) for f in onlyfiles}

In [6]:
selected_file = onlyfiles[0]
selected_pp_sess = file_to_sess[selected_file]
input_path = data_dir + selected_file # pick a random file, idx 26 and

print(f"input_path {input_path}")
metadata_jsons = read_all_lslpresets(path_to_jsonfiles=lsl_dir)
with open(input_path, 'rb') as handle:
    rns_data = pickle.load(handle)

for key in rns_data.keys():
    rns_data[key].append(return_metadata_from_name(key, metadata_jsons))

eeg_channel_names = mne.channels.make_standard_montage('biosemi64').ch_names
eeg_df = pd.DataFrame(rns_data['BioSemi'][0], columns=rns_data['BioSemi'][1],
                  index=rns_data['BioSemi'][2]['ChannelNames']).T
eeg_df = eeg_df.iloc[:,1:65]
eeg_df.columns = eeg_channel_names

motor_df = pd.DataFrame(rns_data['Unity_MotorInput'][0], columns=rns_data['Unity_MotorInput'][1],
                          index=rns_data['Unity_MotorInput'][2]['ChannelNames']).T

input_path ../data/09_24_2022_11_31_56-Exp_adadrive-Sbj_18-Ssn_3.dats.pkl


In [22]:
all_eeg_trials = []
all_motor_trials = []
sub_trials = trial_dfs[(trial_dfs.ppid == selected_pp_sess[0]) & (trial_dfs.session == selected_pp_sess[1])]
max_eeg_trial = int(sub_trials.trial_duration.min()*2048) # ensure same size
max_motor_trial = int(sub_trials.trial_duration.min()*40) # ensure same size

new_sample_rate = 100
for index,trial in sub_trials.iterrows():
    #sub_eeg_df = eeg_df[(eeg_df.index >= trial.trial_start_time) & (eeg_df.index <= trial.trial_end_time)].iloc[:max_eeg_trial]
    #sub_motor_df = motor_df[(motor_df.index >= trial.trial_start_time) & (motor_df.index <= trial.trial_end_time)].iloc[:max_motor_trial]
    sub_eeg_df = eeg_df[(eeg_df.index >= trial.trial_start_time) & (eeg_df.index <= trial.trial_end_time)]
    sub_motor_df = motor_df[(motor_df.index >= trial.trial_start_time) & (motor_df.index <= trial.trial_end_time)]

    # eeg resample
    secs = sub_eeg_df.shape[0]/2048 # Number of seconds in signal X
    samps = int(secs*new_sample_rate)     # Number of samples to downsample
    sub_eeg_df = scipy.signal.resample(sub_eeg_df, samps)

    # motor resample
    secs = sub_motor_df.shape[0]/40 # Number of seconds in signal X
    samps = int(secs*new_sample_rate)     # Number of samples to downsample
    sub_motor_df = scipy.signal.resample(sub_motor_df, samps)

    all_eeg_trials.append(sub_eeg_df)
    all_motor_trials.append(sub_motor_df)
#all_eeg_trials = np.array(all_eeg_trials)
#all_motor_trials = np.array(all_motor_trials)
all_eeg_trials_cat = np.concatenate(all_eeg_trials).T
all_motor_trials_cat = np.concatenate(all_motor_trials).T

In [23]:
mdic = {"eeg": all_eeg_trials, "motor": all_motor_trials, "label": f"{trial.ppid}_{trial.session}", "sample_rate": new_sample_rate}
savemat(f"{output_path}{trial.ppid}_{trial.session}.mat", mdic)

In [24]:
all_eeg_trials_cat

array([[-28552.11897898, -28568.03138624, -28538.09691621, ...,
        -29673.08280386, -29640.89760013, -29671.4593658 ],
       [-30849.58537421, -30836.43232117, -30826.29087197, ...,
        -32900.2297785 , -32874.96724528, -32875.70944564],
       [ -4415.75010314,  -4350.4570849 ,  -4378.53117744, ...,
        -10148.60620601, -10127.5791655 , -10131.15583926],
       ...,
       [-19380.8512941 , -19322.95840799, -19339.67256114, ...,
        -22131.57051793, -22109.4102402 , -22126.96601199],
       [-12233.20386159, -12210.70363604, -12215.99188763, ...,
        -10307.06067477, -10299.10520069, -10309.77446623],
       [-14985.12440857, -14928.26469337, -14941.61101949, ...,
        -16630.7690051 , -16599.53869508, -16626.49498982]])

In [None]:
all_eeg_trials