# Example preprocessing notebook

In this notebook we are going to walk through a single patient example. There are probably some patient-specific stuff in here that might change with other patients. Should be able to demonstrate the usage of different functions from the toolbox.

In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress
import pandas as pd
import h5py
from mne.preprocessing.bads import _find_outliers

In [None]:
from LFPAnalysis import lfp_preprocess_utils, sync_utils

## Load the data and clean up MNE structure

In [None]:
base_dir = '/sc/arion' # this is the root directory for most un-archived data and results 

save_dir = f'{base_dir}/work/qasims01/MemoryBanditData/EMU/Subjects/MS007'  # save intermediate results in the 'work' directory
    
# I have saved most of my raw data in the 'projects directory'
behav_dir = f'{base_dir}/projects/guLab/Salman/EMU/MS007/behav/Day1'
neural_dir = f'{base_dir}/projects/guLab/Salman/EMU/MS007/neural/Day1'
anat_dir = f'{base_dir}/projects/guLab/Salman/EMU/MS007/anat'
edf_files = glob(f'{neural_dir}/*.edf')




Try loading in the data into memory

In [None]:
MS007_data = mne.io.read_raw_edf(edf_files[0], preload=True)
# If you try to preload, it will kill the kernel (at mem=4000). Probably need to request more memory in Minerva (mem=8000 seems to work)

# # If not preloading: 
# raw_data = MS007_data.get_data()

In [None]:
# # this command sets the backend so that the plots are interactive
# get_ipython().run_line_magic('matplotlib', 'qt')

# # plot all channels so that irrelevant/noisy channels can be saved as bad
# MS007_data.plot(n_channels=50)

In [None]:
# Sanity check
plt.plot(MS007_data._data[0,:4999])
plt.title("Raw iEEG, electrode 0, samples 0-4999")
plt.show()

In [None]:
# Sanity check the photodiode
trig_ix = MS007_data.ch_names.index('DC1')
plt.plot(MS007_data._data[trig_ix, 10000:50000])
plt.title("Photodiode")
plt.show()

In [None]:
# Load the electrode localization data and add it in

csv_files = glob(f'{anat_dir}/*labels.csv')
elec_locs = pd.read_csv(csv_files[0])

# Sometimes there's extra columns with no entries: 
elec_locs = elec_locs[elec_locs.columns.drop(list(elec_locs.filter(regex='Unnamed')))]


The electrode names read out of the edf file do not always match those 
in the pdf (used for localization). This could be error on the side of the tech who input the labels, 
or on the side of MNE reading the labels in. Usually there's a mixup between lowercase 'l' and capital 'I'.

Sometimes, there's electrodes on the pdf that are NOT in the MNE data structure... let's identify those as well. 


In [None]:
new_mne_names, unmatched_names, unmatched_seeg = lfp_preprocess_utils.match_elec_names(MS007_data.ch_names, elec_locs.label)




So we retun a new list of channel names for the mne data structure as well as a list of channels in the localization csv which are not found in the mne structure. Make sure that unmatched_seeg does not factor into any referencing schemes later - it's not in the MNE data

In [None]:
# Rename the mne data according to the localization data
new_name_dict = {x:y for (x,y) in zip(MS007_data.ch_names, new_mne_names)}
MS007_data.rename_channels(new_name_dict)

In [None]:
MS007_data.ch_names

## Now process the channel data


In [None]:
# Note, there is surface EEG data that we should separately indicate from the sEEG:
right_seeg_names = [i for i in MS007_data.ch_names if i.startswith('r')]
left_seeg_names = [i for i in MS007_data.ch_names if i.startswith('l')]
# This is optional. I might want to look at scalp EEG at some point (lol) so might as well tag them here. 
eeg_names = [
 'fp1',
 'f7',
 't3',
 't5',
 'o1',
 'f3',
 'c3',
 'p3',
 'fp2',
 'f8',
 't4',
 't6',
 'o2',
 'f4',
 'c4',
 'p4',
 'fz',
 'cz',
 'pz']
print(f'We have a total of {len(left_seeg_names) + len(right_seeg_names)} sEEG electrodes')
print(f'We have a total of {len(eeg_names)} EEG electrodes')
# MS007_data.set_channel_types()

In [None]:
sEEG_mapping_dict = {f'{x}':'seeg' for x in left_seeg_names+right_seeg_names}
EEG_mapping_dict = {f'{x}':'eeg' for x in eeg_names}
trig_mapping_dict = {'dc1':'stim'}
# Drop random chans? 
drop_chans = list(set(MS007_data.ch_names)^set(eeg_names+left_seeg_names+right_seeg_names+['dc1']))
MS007_data.drop_channels(drop_chans)

In [None]:
# Set channel types:
MS007_data.set_channel_types(sEEG_mapping_dict)
MS007_data.set_channel_types(EEG_mapping_dict)
MS007_data.set_channel_types(trig_mapping_dict)

In [None]:
# make montage (convert mm to m)!!
montage = mne.channels.make_dig_montage(ch_pos=dict(zip(elec_locs.label, 
                                                        elec_locs[['mni_x', 'mni_y', 'mni_z']].to_numpy(dtype=float)/1000)),
                                        coord_frame='mni_tal')

MS007_data.set_montage(montage, match_case=False, on_missing='warn')


Denote bad channels

In [None]:
# Clean up the MNE data 

bads = lfp_preprocess_utils.detect_bad_elecs(MS007_data, 
                                             sEEG_mapping_dict)

MS007_data.info['bads'] = bads

## Re-referencing

Now, let's use the localization data to determine the gray vs. white matter electrodes. 
Then, let's re-reference each gray matter electrode to the closest and most low-amplitude white matter electrode. 

Make sure 'bad' electrodes are not used in the re-referencing. Same with unmatched seeg electrodes (not present in the mne data structure).

In [None]:
anode_list, cathode_list, drop_wm_channels, oob_channels = lfp_preprocess_utils.wm_ref(MS007_data, elec_locs, 
                                                       MS007_data.info['bads'], 
                                                       unmatched_seeg=unmatched_seeg)




In [None]:
MS007_data_reref = mne.set_bipolar_reference(MS007_data, 
                          anode=anode_list, 
                          cathode=cathode_list,
                          copy=True)

In [None]:
MS007_data_reref.drop_channels(drop_wm_channels)

In [None]:
MS007_data_reref.drop_channels(oob_channels)

In [None]:
right_seeg_names = [i for i in MS007_data_reref.ch_names if i.startswith('r')]
left_seeg_names = [i for i in MS007_data_reref.ch_names if i.startswith('l')]
sEEG_mapping_dict = {f'{x}':'seeg' for x in left_seeg_names+right_seeg_names}
MS007_data_reref.set_channel_types(sEEG_mapping_dict)


In [None]:
bads = lfp_preprocess_utils.detect_bad_elecs(MS007_data_reref, 
                                             sEEG_mapping_dict)

In [None]:
MS007_data_reref.info['bads'] = bads

In [None]:
fig = MS007_data_reref.plot()

In [None]:
# Plot the supposedly bad channels: 

f, ax = plt.subplots(len(MS007_data_reref.info['bads']), 1, figsize=(10, len(MS007_data_reref.info['bads'])))
for ix, chan in enumerate(mne.io.pick.pick_channels(MS007_data_reref.ch_names, MS007_data_reref.info['bads'])): 
    ax[ix].plot(MS007_data_reref._data[chan, :], linewidth=0.2, color='k')
plt.show()

## Epoch and pre-process signals