# Imports

In [None]:
path_to_base_package = '../..'
import sys

# setting path
sys.path.append(f"{path_to_base_package}")

import os
import pandas as pd
from mna.sessions.eye_session import process_session_eye
from mna.sessions.eeg_session import process_session_eeg
from mna.sessions.motor_session import process_session_motor
from mna.sessions.ecg_session import process_session_ecg
from os import listdir
from os.path import isfile, join
from mna.utils.rnapp_data_format import read_all_lslpresets, return_metadata_from_name, event_data_from_data, read_event_data
import pickle
from multiprocessing import Pool
from functools import partial
from pivottablejs import pivot_ui
from mna.utils.rnapp_data_format import read_all_files

# Batch convert raw files into pickle

In [4]:
reconvert_raw_filse = False # note that this conversion needs to only be done once
if reconvert_raw_filse:
    read_all_files(data_dir='/home/jupyter/raw_data/',
                   pickle_dir='/home/jupyter/mna/data/', save_pickle=True)

# Batch analysis

In [5]:
data_dir = f"{path_to_base_package}/data/"
timestamp_fixer_path = f"{data_dir}annotated/fit_timestamp_adjuster.pkl"
lsl_dir = f"{path_to_base_package}/mna/LSLPresets/"
output_dir = f"{path_to_base_package}/output/batch_analysis_non_baseline/"
if not os.path.isdir(output_dir): os.makedirs(output_dir)
metadata_jsons = read_all_lslpresets(path_to_jsonfiles=lsl_dir)
onlyfiles = [f for f in listdir(data_dir) if isfile(join(data_dir, f)) and '.pkl' in f]
ts_fixer = pickle.load(open(timestamp_fixer_path, 'rb')) # features == 'processed_trial_duration',  'processed_trial_duration_1', 'lsl_timestamps'
interrupted_sessions = [(13,1), (22,1)]
remove_sessions = [(13,1),(15,1),(22,1)]
reference_ica = "sbj20ssn03"
save_data_pkl = True # save data into pickle files
save_ica_plts = False # save ICA components plots
epoch_raw_eeg = False # epoching raw data

preturn = 1000
baseline_period = 250
rs = 64 # random seed

# determine whether we do motor analysis
motor_events = True # this determines whether we do baseline correction and average reference
if motor_events:
    set_baseline=None
    set_average_reference=False
else:
    set_baseline=(-((preturn+250)/1000),-((preturn)/1000))
    set_average_reference=True

## Process file to extract features for all modalities

In [9]:
def process_file(ica_dict, each_file, overwrite=False):
    ica_epochs_dict = {}
    eog_idx_dict = {}
    events_dict = {}
    input_path = data_dir + each_file

    sbj_id = each_file[each_file.find('Sbj_')+4:each_file.find('-Ssn')]
    ssn_no = each_file[each_file.find('Ssn_')+4:each_file.find('.dats')]

    if len(sbj_id) < 2: sbj = "sbj0"+sbj_id
    else: sbj = "sbj"+sbj_id
    if len(ssn_no) < 2: ssn = "ssn0"+ssn_no
    else: ssn = "ssn"+ssn_no
    if reference_ica in ica_dict:
        ref_ica = ica_dict['sbj20ssn03']
    elif sbj+ssn == reference_ica:
        ref_ica = None
    if not overwrite and os.path.exists(f"{output_dir}/saved_files/{sbj+ssn}/"):
        print(sbj+ssn,'sbj+ssn already exists')
        return None

    with open(input_path, 'rb') as handle:
        rns_data = pickle.load(handle)

    ## Add metadata to data

    for key in rns_data.keys():
        rns_data[key].append(return_metadata_from_name(key, metadata_jsons))

    event_df = read_event_data(rns_data, remove_id_sessions=remove_sessions) # typically only 15_1 and 22_1 will be used here, change below too
    if event_df.empty:
        return None
    event_df = event_df[event_df.block_condition == 'voice']
    event_df['trial_damage'] = event_df.damage.diff().fillna(0)
    event_df['trial_duration'] = event_df.trial_end_time - event_df.trial_start_time

    percent_missing = event_df.notnull().sum() / len(event_df)
    summary_statistics = {}
    summary_statistics['voice_success_rate'] = percent_missing['spoken_difficulty']
    event_df['spoken_difficulty'] = event_df['spoken_difficulty'].fillna("unknown")
    event_df['spoken_difficulty_encoded'] = event_df.spoken_difficulty.replace(to_replace=['easy', 'hard', 'unknown'],
                                                                               value=[1, 2, 0])

    # motor
    post_processed_event_df, turns_df = process_session_motor(rns_data, event_df, motor_channel='Unity_MotorInput', plot_motor_result = False, plot_motor_snippet = 30, plot_frequency = 0, preturn=preturn)
    if motor_events:
        post_processed_event_df = turns_df

    # ecg
    # fit and report on 60 seconds leading up to event start (minimum required for HF component)
    post_processed_event_df = process_session_ecg(rns_data, post_processed_event_df,plot_frequency=0,plot_ecg_snippet=40,pretrial_period=30)

    if 'Unity_ViveSREyeTracking' in rns_data:
        # fit on 3 seconds before event start, but only report data on segments during the period
        post_processed_event_df = process_session_eye(rns_data, post_processed_event_df,detect_blink=True, pretrial_period=3, posttrial_period=0, plot_frequency=0, plot_eye_snippet=40, classifiers=['NSLR'], pupil_average_limit=False)

    # eeg
    post_processed_event_df, epochs, events, info, reject_log, ica, eog_idx = process_session_eeg(rns_data, post_processed_event_df, run_autoreject=True, run_ica=True, save_raw_eeg = False, sbj_session = sbj+ssn, plot_epochs = False, template_ica = ref_ica, analyze_pre_ica = False, average_reference=set_average_reference, eye_movement_removal=False, tmin=-((preturn+baseline_period)/preturn), tmax=0, baseline = set_baseline, normalize_pow_freq = True, filter_events = False)
    ica_dict[sbj+ssn] = ica

    if motor_events:
        post_processed_event_df.to_csv(f"{output_dir}ppid_{post_processed_event_df.iloc[0].ppid}_session_{post_processed_event_df.iloc[0].session}_motor.csv")
    else:
        post_processed_event_df.to_csv(f"{output_dir}ppid_{post_processed_event_df.iloc[0].ppid}_session_{post_processed_event_df.iloc[0].session}.csv")


    # save data for later use
    if save_data_pkl:
        pickle_dir = f"{output_dir}/saved_files/{sbj+ssn}/"
        os.makedirs(os.path.dirname(pickle_dir), exist_ok=True)
        with open(f"{pickle_dir}events.pickle", 'wb') as handle_events:
            pickle.dump(events, handle_events, protocol=pickle.HIGHEST_PROTOCOL)
        with open(f"{pickle_dir}ica_epochs.pickle", 'wb') as handle_ica_eps:
            pickle.dump(epochs, handle_ica_eps, protocol=pickle.HIGHEST_PROTOCOL)
        with open(f"{pickle_dir}ica.pickle", 'wb') as handle_ica:
            pickle.dump(ica_dict, handle_ica, protocol=pickle.HIGHEST_PROTOCOL)
        with open(f"{pickle_dir}eog_idx.pickle", 'wb') as handle_eog:
            pickle.dump(eog_idx, handle_eog, protocol=pickle.HIGHEST_PROTOCOL)

    return post_processed_event_df, events_dict, ica_epochs_dict, ica_dict, eog_idx_dict


## Set up the reference participant (20_3) for blink removal using ICA

In [None]:
results = []
result = process_file({}, onlyfiles[0], set_baseline, set_average_reference, overwrite=True)
ica_dict = result[3]

Creating RawArray with float64 data, n_channels=89, n_times=3399947
    Range : 0 ... 3399946 =      0.000 ...  1660.130 secs
Ready.
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 55 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 55.00 Hz
- Upper transition bandwidth: 13.75 Hz (-6 dB cutoff frequency: 61.88 Hz)
- Filter length: 6759 samples (3.300 sec)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:    7.6s finished


Fitting ICA to data using 64 channels (please be patient, this may take a while)
Selecting by number: 64 components


In [None]:
multi_process_files = False
if multi_process_files:
    with Pool(4) as p:
        results = p.map(partial(process_file, ica_dict), onlyfiles)
else:
    for onlyfile in onlyfiles[1:]:
        result = process_file(ica_dict, onlyfile)
        if result:
            results.append(result)
all_dfs = pd.concat([r[0] for r in results], ignore_index=True)

# Save results and generate interactive PivotTable

In [None]:
if motor_events:
    all_dfs.to_csv(f"{output_dir}all_results_motor.csv", index=False)
    all_dfs.to_excel(f"{output_dir}all_results_motor.xlsx")
    pivot_ui(all_dfs, outfile_path=f"{output_dir}all_results_motor.html");
else:
    all_dfs.to_csv(f"{output_dir}all_results.csv", index=False)
    all_dfs.to_excel(f"{output_dir}all_results.xlsx")
    pivot_ui(all_dfs, outfile_path=f"{output_dir}all_results.html");