In this notebook, we are going to load some of the data that we processed in the Condensed Notebook and perform within-trial analysis of the oscillation bursts using eBOSC, a sliding window PSD peak detector thresholded against the data. 

In [5]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [55]:
import numpy as np
import mne
import mne_connectivity
import pactools
from mne_connectivity import phase_slope_index, seed_target_indices, spectral_connectivity_epochs
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import fooof
# Import plotting functions
from fooof.plts.spectra import plot_spectrum, plot_spectra
# Import the FOOOF object
from fooof import FOOOF
from fooof import FOOOFGroup
import os 
import joblib
import statsmodels.formula.api as smf
import statsmodels.api as sm
import pickle
from tqdm import tqdm
from IPython.display import clear_output
import warnings 

# I only want to see warnings once
warnings.filterwarnings('ignore')

from joblib import delayed, Parallel
from statsmodels.stats import multitest

Note: If you have installed this package in editable form on Minerva, you must append the local path! This is because Minerva requires that you point your package installs away from the local directory for space reasons, but editable packages have to be installed locally.

In [7]:
import sys
sys.path.append('/hpc/users/qasims01/resources/LFPAnalysis')

In [17]:
from LFPAnalysis import lfp_preprocess_utils, sync_utils, analysis_utils, nlx_utils, oscillation_utils, statistics_utils


In [18]:
import h5io

## First, get the behavior data

This is from another notebook with a kernel and environment specifically for beahvior

In [12]:
base_dir = '/sc/arion' # this is the root directory for most un-archived data and results
load_dir = f'{base_dir}/work/qasims01/MemoryBanditData/EMU'

# Load the demographics data to get information about who has run the task
demo_file = f'{base_dir}/projects/guLab/Salman/EMU/MemoryBanditsDatabase.xlsx'
demo_data = pd.read_excel(demo_file, engine='openpyxl')
subj_df = demo_data[(demo_data.LongVersion==1) & (demo_data.Minerva==1)]

# Let's get rid of Iowa patients for now 

subj_df = subj_df[subj_df.Site!='UI']

days = ['Day1', 'Day2']
subj_ids = subj_df.MSSMCode.values
subj_formats = subj_df.format.values
subj_sites = subj_df.Site.values
subj_days = subj_df.Day2.values

In [13]:
# load behavioral data
learn_df = pd.read_csv(f'{load_dir}/learn_df.csv')
combined_df = pd.read_csv(f'{load_dir}/combined_df.csv')

# Filter just to the participants in this notebook
learn_df = learn_df[learn_df.participant.isin(subj_ids)]
combined_df = combined_df[combined_df.participant.isin(subj_ids)]


# I enter the day information in stupidly 
combined_df.condition = combined_df.condition.apply(lambda x: 'Day1' if x=='day_1' else 'Day2' if x=='day_2' else np.nan)
combined_df['recog_time'] = combined_df['recog_time'].apply(lambda x: float(x) if (~pd.isna(x)) & (x!='None') else x)
combined_df['baseline_start_mem'] = combined_df['baseline_start_mem'].apply(lambda x: float(x) if (~pd.isna(x)) & (x!='None') else x)

# # add RPE to the combined df dataframe: 

# combined_df['rpe'] = np.nan
# for ix, row in learn_df.iterrows():
#     combined_df.loc[(combined_df.img_path==row.img_path) & (combined_df.participant==row.participant), 'rpe'] = row.rpe

In [36]:
dir(oscillation_utils)

['BOSC_detect',
 'BOSC_tf',
 'Path',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'chi2',
 'compute_connectivity',
 'compute_eBOSC_parallel',
 'eBOSC_episode_create',
 'eBOSC_episode_postproc_fwhm',
 'eBOSC_episode_postproc_maxbias',
 'eBOSC_episode_rm_shoulder',
 'eBOSC_episode_sparsefreq',
 'eBOSC_getThresholds',
 'eBOSC_wrapper',
 'find_nearest_value',
 'getTimeFromFTmat',
 'get_project_root',
 'mne',
 'np',
 'numpy',
 'pd',
 'phase_slope_index',
 'seed_target_indices',
 'sio',
 'sm',
 'spectral_connectivity_epochs',
 'swap_time_blocks',
 'tqdm',
 'zscore']

# eBOSC

eBOSC allows us to identify transient bursts

After epoching, we can compute the eBOSC, using the same frequency range we used for FOOOF. This is because both use line-fitting regiments for the PSD that tend to break down when higher frequencies are included. 

In [14]:
cfg_eBOSC = dict()
cfg_eBOSC['F'] = np.linspace(1,30, 44)   # frequency sampling
cfg_eBOSC['wavenumber'] = 6                   # wavelet parameter (time-frequency tradeoff)
cfg_eBOSC['fsample'] = 500 # current sampling frequency of EEG data
cfg_eBOSC['pad.tfr_s'] = 0                   #  padding following wavelet transform to avoid edge artifacts in seconds (bi-lateral)
cfg_eBOSC['pad.detection_s'] = 0             # padding following rhythm detection in seconds (bi-lateral); 'shoulder' for BOSC eBOSC.detected matrix to account for duration threshold
cfg_eBOSC['pad.background_s'] = 0             # padding of segments for BG (only avoiding edge artifacts)

cfg_eBOSC['threshold.excludePeak'] = np.array([])   # lower and upper bound of frequencies to be excluded during background fit (Hz) (previously: LowFreqExcludeBG HighFreqExcludeBG)
cfg_eBOSC['threshold.duration'] = np.kron(np.ones((1,len(cfg_eBOSC['F']))),3) # vector of duration thresholds at each frequency (previously: ncyc)
cfg_eBOSC['threshold.percentile'] = .95    # percentile of background fit for power threshold

cfg_eBOSC['postproc.use'] = 'no'           # Post-processing of rhythmic eBOSC.episodes, i.e., wavelet 'deconvolution' (default = 'no')
cfg_eBOSC['postproc.method'] = 'FWHM'       # Deconvolution method (default = 'MaxBias', FWHM: 'FWHM')
cfg_eBOSC['postproc.edgeOnly'] = 'yes'      # Deconvolution only at on- and offsets of eBOSC.episodes? (default = 'yes')
cfg_eBOSC['postproc.effSignal'] = 'PT'      # Power deconvolution on whole signal or signal above power threshold

cfg_eBOSC['trial'] = []            # select trials (default: all, indicate in natural trial number (not zero-starting))
cfg_eBOSC['trial_background'] = []       # select trials for background (default: all, indicate in natural trial

In [53]:
ev_dict = {'feedback_start': [0, 1.5]}

# Define peak_width, peak_height, peak_threshold, max_n_peaks
FOOOF_kwargs = {'peak_width_limits': [1, 8], 
               'min_peak_height': 0.1,
                'peak_threshold': 1.,
                'max_n_peaks': 3,
               'freq_range': [1, 30]}
 
# Define our frequency bands of interest
band_dict = {'delta' : [1, 4],
               'theta' : [4, 8],
               'alpha' : [8, 13],
               'beta' : [13, 30]}

conditions = ['reward==0',
             'reward==1']

progress_bar = tqdm(subj_ids[0:1], ascii=True, desc='Computing eBOSC')

for subj_id in progress_bar:
    # Load one person's data 
    for day in ['Day1']:
        load_path = f'{base_dir}/projects/guLab/Salman/EphysAnalyses/{subj_id}/neural/{day}'
        for event in ev_dict.keys():   
            filepath = f'{base_dir}/projects/guLab/Salman/EphysAnalyses/{subj_id}/scratch/eBOSC/{event}'
            if not os.path.exists(f'{filepath}/plots'):
                os.makedirs(f'{filepath}/plots')
            if not os.path.exists(f'{filepath}/dfs'):
                os.makedirs(f'{filepath}/dfs')

            mne_data_reref = mne.read_epochs(f'{load_path}/{event}-epo.fif', preload=True)
            mne_data_reref.metadata = learn_df[learn_df.participant==subj_id]

            save_path = f'{base_dir}/projects/guLab/Salman/EphysAnalyses/{subj_id}'
            chan_name = elec_df.label.unique()[0]
            # oscillation_utils.compute_eBOSC_parallel(chan_name, mne_data_reref, subj_id, elec_df, 'feedback_start', ev_dict, conditions, 
            #                do_plot=True, save_path='/sc/arion/projects/guLab/Salman/EphysAnalyses', 
            #                do_save=True, both_dfs=True, **cfg_eBOSC)
            
            Parallel(n_jobs=-1, verbose=1)(delayed(oscillation_utils.compute_eBOSC_parallel)(chan_name, 
                                                                                             mne_data_reref, 
                                                                                             subj_id, 
                                                                                             elec_df, 
                                                                                             'feedback_start', 
                                                                                             ev_dict, 
                                                                                             conditions, 
                                                                                             do_plot=True, 
                                                                                             save_path='/sc/arion/projects/guLab/Salman/EphysAnalyses', 
                                                                                             do_save=True,
                                                                                             both_dfs=True,
                                                                                             **cfg_eBOSC) for chan_name in mne_data_reref.ch_names)



Computing eBOSC:   0%|          | 0/1 [00:00<?, ?it/s]

Reading /sc/arion/projects/guLab/Salman/EphysAnalyses/MS012/neural/Day1/feedback_start-epo.fif ...
    Found the data of interest:
        t =   -1500.00 ...    2500.00 ms
        0 CTF compensation matrices available
Adding metadata with 83 columns
80 matching events found
No baseline correction applied
0 projection items activated
Replacing existing metadata with 19 columns


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['detected'].groupby(['trial', 'frequency']).mean(numeric_only=False)).reset_index().drop(columns=['time'])
  time_averaged_df = pd.DataFrame(eBOSC['

Channel: racas8-racas9; Nr. 1/1
NOT removing frequency peaks from the background
Channel: rals1-rals2; Nr. 1/1
NOT removing frequency peaks from the background
Channel: rals11-rals12; Nr. 1/1
NOT removing frequency peaks from the background
Channel: racas4-racas5; Nr. 1/1
NOT removing frequency peaks from the background
Channel: raglt5-raglt6; Nr. 1/1
NOT removing frequency peaks from the background
Channel: rals3-rals4; Nr. 1/1
NOT removing frequency peaks from the background
Channel: racas1-racas2; Nr. 1/1
NOT removing frequency peaks from the background
Channel: raglt2-raglt3; Nr. 1/1
NOT removing frequency peaks from the background
Channel: rals9-rals10; Nr. 1/1
NOT removing frequency peaks from the background
Channel: racas2-racas3; Nr. 1/1
NOT removing frequency peaks from the background
Channel: raglt6-raglt7; Nr. 1/1
NOT removing frequency peaks from the background
Channel: rals8-rals9; Nr. 1/1
NOT removing frequency peaks from the background
Channel: racas6-racas7; Nr. 1/1
NOT


KeyboardInterrupt

