In [2]:
# General
import sys
import os
import os.path as op
from collections import OrderedDict as od
from importlib import reload
from glob import glob
import warnings

# Scientific
import numpy as np
import pandas as pd
import xarray
pd.options.display.max_rows = 200
pd.options.display.max_columns = 999

# Personal
sys.path.append('/home1/dscho/code/general')
sys.path.append('/home1/dscho/code/from_others')
sys.path.append('/home1/dscho/code/projects')
sys.path.append('/home1/dscho/code/projects/manning_replication')
# sys.path.append('/home1/dscho/code/projects/unit_activity_and_hpc_theta')
from cluster_helper.cluster import cluster_view
# from CMLDask import CMLDask
# from dask.distributed import wait, as_completed, progress
from helper_funcs import *
import data_io as dio
import array_operations as aop
# import phase_locking
# import manning_analysis
from unit_activity_and_hpc_theta import phase_locking as phlock
from time_cells import eeg_preproc, spike_preproc
from phase_locking import spectral_analysis, unit_lfp_comparison



# Process Goldmine microwire EEG

In [8]:
def process_eeg_parallel(subj_sess_chan):
    subj, sess, chan = subj_sess_chan.split('_')
    subj_sess = '{}_{}'.format(subj, sess)
    
#     from time import sleep
#     from random import randint
        
#     # Go to sleep.
#     sleep_secs = randint(0, 1800)
#     sleep_secs = (int(chan) * 30) + randint(-30, 30)
#     sleep(sleep_secs)

    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from time_cells import eeg_preproc
    
    l_freq = 0.1
    h_freq = 80
    notch_freqs = [60]
    downsample_to = 1000
    save_output = True
    overwrite = False
    
    try:
        eeg, sr = eeg_preproc.process_eeg(subj_sess,
                                          chan=chan,
                                          downsample_to=downsample_to,
                                          l_freq=l_freq,
                                          h_freq=h_freq,
                                          notch_freqs=notch_freqs,
                                          save_output=save_output,
                                          overwrite=overwrite,
                                          verbose=False)
        return None
    except:
        err = sys.exc_info()
        errf = '/home1/dscho/logs/TryExceptError-process_eeg_parallel-{}'.format(subj_sess_chan)
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [9]:
# Get sessions.
sessions = np.unique([op.basename(f).split('-')[0] 
                      for f in glob(op.join('/data7', 'goldmine', 'analysis', 'events', '*.pkl'))])
print('{} subjects, {} sessions'.format(len(np.unique([x.split('_')[0] for x in sessions])), len(sessions)))

10 subjects, 12 sessions


In [4]:
processed_chans = []
chans_to_process = []
for subj_sess in sessions:
    subj, sess = subj_sess.split('_')
    eeg_chan_files = glob(op.join('/data7/goldmine', 'data', subj, sess, 'micro_lfps', 'CSC*.mat'))
    processed_chans += glob(op.join('/data7/goldmine', 'data', subj, sess, 'micro_lfps', 'sr1000_bandpass0.1-80_notch60', 'CSC*.pkl'))
    chans_to_process += ['{}_{}'.format(subj_sess, str_replace(op.basename(f), {'CSC': '', '.mat': ''}))
                         for f in eeg_chan_files
                         if not op.exists(op.join('/data7/goldmine', 'data', subj, sess, 'micro_lfps', 'sr1000_bandpass0.1-80_notch60',
                                                  'CSC{}.pkl'.format(str_replace(op.basename(f), {'CSC': '', '.mat': ''}))))]
print('{} EEG channels processed'.format(len(processed_chans)))
print('{} EEG channels still to process'.format(len(chans_to_process)))

8 EEG channels processed
898 EEG channels still to process


In [10]:
# timer = Timer()
# eeg, sr = process_eeg_parallel(chans_to_process[0])

# print(sr, eeg.shape)
# print(timer)

In [16]:
# Get channels to process.
start_time = time()

# Parallel processing
n_ops = len(chans_to_process)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 240)), cores_per_job=24, retries=2) as view:
    output = view.map(process_eeg_parallel, chans_to_process)
    
print('Done in {:.1f}s'.format(time() - start_time))

# Process YCab EEG

In [14]:
def process_eeg_ycab_parallel(subj_df):
#     from time import sleep
#     from random import randint
        
#     # Go to sleep.
#     sleep_secs = randint(0, 1800)
#     sleep_secs = (int(chan) * 30) + randint(-30, 30)
#     sleep(sleep_secs)

    import sys
    import os
    import numpy as np
    sys.path.append('/home1/dscho/code/projects')
    from time_cells import eeg_preproc
    
    convert_v_to_muv = True
    downsample_to = 1000
    l_freq = 0.1
    h_freq = 80
    notch_freqs = [60]
    data_dir = '/scratch/dscho/ycab/eeg'
    save_output = True
    overwrite = True
    
    try:
        for idx, subj_df_row in subj_df.iterrows():
            subj_sess = subj_df_row['subj_sess']
            chan = subj_df_row['chan']
            subj_sess_chan = '{}_{}'.format(subj_sess, chan)
            eeg_in = np.fromfile(subj_df_row['raw_lfp_file'], dtype='float32').astype(np.float64)
            sr_in = 2000
            eeg, sr = eeg_preproc.process_eeg(subj_sess,
                                              chan,
                                              eeg=eeg_in,
                                              sr=sr_in,
                                              convert_v_to_muv=convert_v_to_muv,
                                              downsample_to=downsample_to,
                                              l_freq=l_freq,
                                              h_freq=h_freq,
                                              notch_freqs=notch_freqs,
                                              data_dir=data_dir,
                                              save_output=save_output,
                                              overwrite=overwrite,
                                              verbose=False)
        return None
    except:
        err = sys.exc_info()
        errf = '/home1/dscho/logs/TryExceptError-process_eeg_ycab_parallel-{}'.format(subj_sess_chan)
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [20]:
# Load the subject dataframe (each row = 1 channel)
subj_df = phlock.get_subj_df()
idx = np.where(subj_df.query("(subj!='U367')")['location'].apply(lambda x: x[-1]=='H'))[0]
subjs = subj_df.query("(subj!='U367')").iloc[idx, :]['subj'].unique()
subj_df = subj_df.query("(subj=={})".format(subjs.tolist())).reset_index(drop=True)

# Fix session naming.
subj_df['subj_sess'] = subj_df['subj_sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
subj_df['sess'] = subj_df['sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
yc_subjs = np.sort(subj_df['subj'].unique())
yc_sessions = np.sort(subj_df['subj_sess'].unique())

# Add paths to the processed LFP channels.
data_dir = '/scratch/dscho/ycab/eeg'
subj_df['proc_lfp_file'] = subj_df.apply(lambda x: op.join(data_dir, x['subj'], x['sess'], 'micro_lfps',
                                                           'V-to-muV_sr1000_bandpass0.1-80_notch60',
                                                           'CSC{}.pkl'.format(x['chan'])), axis=1)

print('{} subjects and {} sessions'.format(len(yc_subjs), len(yc_sessions)))
print('subj_df: {}'.format(subj_df.shape))

18 subjects and 43 sessions
subj_df: (2696, 14)


In [21]:
# processed_chans = [row for idx, row in subj_df.iterrows() if op.exists(row['proc_lfp_file'])]
# chans_to_process = [row for idx, row in subj_df.iterrows() if not op.exists(row['proc_lfp_file'])]
processed_sessions = [df for idx, df in subj_df.groupby('subj_sess') if np.all(df['proc_lfp_file'].apply(op.exists))]
sessions_to_process = [df for idx, df in subj_df.groupby('subj_sess') if not np.all(df['proc_lfp_file'].apply(op.exists))]

print('{} sessions processed'.format(len(processed_sessions)))
print('{} sessions still to process'.format(len(sessions_to_process)))

43 sessions processed
0 sessions still to process


In [112]:
timer = Timer()

_subj_df = sessions_to_process[0]
print('{}'.format(_subj_df.iloc[0]['subj_sess']))
_ = process_eeg_ycab_parallel(_subj_df)

print(timer)

U369_ses2, chan1
Notch filtering at [60] Hz
eeg: (825808,), sr: 1000.0 Hz
Ran in 1.0s


In [16]:
# Parallel processing
n_ops = len(sessions_to_process)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 10)), cores_per_job=8, retries=2) as view:
    output = view.map(process_eeg_ycab_parallel, sessions_to_process)

# Process Goldmine phase and power

In [17]:
def save_spectral_fooof_p_episode_parallel(info):
#     # Go to sleep.
#     from time import sleep
#     from random import randint
#     sleep_secs = randint(0, 1800)
#     sleep_secs = (int(chan) * 30) + randint(-30, 30)
#     sleep(sleep_secs)
    import os
    import sys
    import os.path as op
    import numpy as np
    from fooof import FOOOF, FOOOFGroup
    from fooof.objs.utils import combine_fooofs
    sys.path.append('/home1/dscho/code/projects')
    from time_cells import eeg_preproc, spectral_analysis
    
    try:
        subj_sess = info['subj_sess']
        roi = info['roi']
        game_states = info['game_states']
        gs_cat = 'delay' if 'Delay' in game_states[0] else 'nav'
        
        # -----------------------
        # Load EEG for each channel and event.
        buffer = 2500
        l_freq = 0.1
        h_freq = 80
        notch_freqs = [60]
        chan_exclusion_thresh = 2
        verbose = True
        
        event_eeg = eeg_preproc.load_event_eeg(subj_sess,
                                               regions=[roi],
                                               game_states=game_states,
                                               buffer=buffer,
                                               l_freq=l_freq,
                                               h_freq=h_freq,
                                               notch_freqs=notch_freqs,
                                               chan_exclusion_thresh=chan_exclusion_thresh,
                                               verbose=verbose)
        event_eeg = event_eeg[roi].stack(event=('gameState', 'trial')).transpose('event', 'chan', 'time')
        
        # -----------------------
        # Save power and phase at each timepoint, for each channel and event.
        freqs = np.arange(1, 31)
        buffer = 2500
        clip_buffer = False
        n_cycles = 5
        zero_mean = True
        log_power = False
        output_dir = op.join('/scratch/dscho/goldmine', gs_cat, 'spectral')
        save_output = True
        overwrite = True
        verbose = True
        
        power, phase = spectral_analysis.timefreq_wavelet(event_eeg,
                                                          freqs,
                                                          clip_buffer=clip_buffer,
                                                          n_cycles=n_cycles,
                                                          zero_mean=zero_mean,
                                                          log_power=log_power,
                                                          output_dir=output_dir,
                                                          save_output=save_output,
                                                          overwrite=overwrite,
                                                          verbose=verbose)
        
        # -----------------------
        # Run FOOOF on spectral powers for each channel and event.
        freqs = np.arange(1, 31)
        peak_width_limits = (1, 8)
        min_peak_height = 0.2
        max_n_peaks = 4
        peak_threshold = 2
        aperiodic_mode = 'fixed'
        output_dir = op.join('/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine',
                             gs_cat, 'fooof')
        save_output = True
        overwrite = True
        verbose = True
        
        fg = spectral_analysis.run_fooof(subj_sess,
                                         roi,
                                         power=power,
                                         freqs=freqs,
                                         peak_width_limits=peak_width_limits,
                                         min_peak_height=min_peak_height,
                                         max_n_peaks=max_n_peaks,
                                         peak_threshold=peak_threshold,
                                         aperiodic_mode=aperiodic_mode,
                                         output_dir=output_dir,
                                         save_output=save_output,
                                         overwrite=overwrite,
                                         verbose=verbose)
        
        # -----------------------
        # Run P-episode.
        freqs = np.arange(1, 31)
        cycle_thresh = 3
        thresh_req = 'mean'
        chi2_pctl = 0.95
        output_dir = op.join('/scratch/dscho/goldmine', gs_cat, 'p_episode')
        save_output = True
        overwrite = True
        verbose = True
        
        osc_mask = spectral_analysis.run_p_episode(subj_sess,
                                                   roi,
                                                   freqs=freqs,
                                                   cycle_thresh=cycle_thresh,
                                                   thresh_req=thresh_req,
                                                   chi2_pctl=chi2_pctl,
                                                   output_dir=output_dir,
                                                   save_output=save_output,
                                                   overwrite=overwrite,
                                                   verbose=verbose)
        
        # -----------------------
        output = {'event_eeg': event_eeg,
                  'power': power,
                  'phase': phase,
                  'fg': fg,
                  'osc_mask': osc_mask}
        return output
    except:
        err = sys.exc_info()
        errf = '/home1/dscho/logs/TryExceptError-save_spectral_fooof_p_episode_parallel-{}-{}-{}'.format(subj_sess, roi, '_'.join(game_states))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [10]:
# Get sessions.
sessions = np.unique([op.basename(f).split('-')[0] 
                      for f in glob(op.join('/data7', 'goldmine', 'analysis', 'events', '*.pkl'))])
print('{} subjects, {} sessions'.format(len(np.unique([x.split('_')[0] for x in sessions])), len(sessions)))

10 subjects, 12 sessions


In [18]:
# Get a list of all regions to process.
overwrite = False
# data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/delay'
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'

# -----------------------
if op.basename(data_dir) == 'delay':
    game_states = ['Delay1', 'Delay2']
else:
    game_states = ['Encoding', 'Retrieval']
    
processed = []
to_process = []
for subj_sess in sessions:
    mont = spike_preproc.get_montage(subj_sess)
    for roi in mont.keys():
        info = {'subj_sess': subj_sess,
                'roi': roi,
                'game_states': game_states}
        subj_sess_roi = '{}-{}'.format(subj_sess, roi)
        basename = '{}.pkl'.format(subj_sess_roi)
        output_files = (op.join(data_dir, 'spectral', 'power', basename),
                        op.join(data_dir, 'spectral', 'phase', basename),
                        op.join(data_dir, 'fooof', basename.replace('.pkl', '.json')),
                        op.join(data_dir, 'p_episode', basename))
        if np.all([op.exists(f) for f in output_files]) and not overwrite:
            processed.append(info)
        else:
            to_process.append(info)

print('{}/{} regions to process'.format(len(to_process), len(processed) + len(to_process)))

1/113 regions to process


In [19]:
timer = Timer()

ii = 0
print(to_process[ii])
_ = save_spectral_fooof_p_episode_parallel(to_process[ii])

print(timer)

{'subj_sess': 'U527_ses0', 'roi': 'LMH', 'game_states': ['Encoding', 'Retrieval']}
U527_ses0
---------
Kept 8/8 (100%) channels across 1 regions
Ran in 4.6s

Ran in 4.8s


In [21]:
# Get channels to process.
start_time = time()

# Parallel processing
n_ops = len(to_process)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 6)), cores_per_job=24, retries=2) as view:
    output = view.map(save_spectral_fooof_p_episode_parallel, to_process)
    
print('Done in {:.1f}s'.format(time() - start_time))

# Process YCab phase and power

In [188]:
def save_spectral_fooof_p_episode_ycab_parallel(data_in):
#     # Go to sleep.
#     from time import sleep
#     from random import randint
#     sleep_secs = randint(0, 1800)
#     sleep_secs = (int(chan) * 30) + randint(-30, 30)
#     sleep(sleep_secs)
    import os
    import sys
    import os.path as op
    import numpy as np
    import xarray
    from fooof import FOOOF, FOOOFGroup
    from fooof.objs.utils import combine_fooofs
    sys.path.append('/home1/dscho/code/general')
    import array_operations as aop
    sys.path.append('/home1/dscho/code/projects')
    from time_cells import eeg_preproc, spectral_analysis
    
    try:
        subj_sess = data_in['subj_sess']
        subj, sess = subj_sess.split('_')
        roi = data_in['roi']
        mont = data_in['mont']
        
        # -----------------------
        # Load EEG for each channel and event.
        regions = [roi]
        sr = 1000
        session_cut = 60000 # cut this many samples from start and end of the session
        event_len = 30000
        buffer = 2500
        l_freq = 0.1
        h_freq = 80
        notch_freqs = [60]
        chan_exclusion_thresh = 2
        output_dir = op.join('/scratch/dscho/ycab/eeg', subj, sess, 'micro_lfps',
                             'V-to-muV_sr1000_bandpass0.1-80_notch60')
        verbose = True

        time_eeg = eeg_preproc.load_time_eeg(subj_sess,
                                             mont=mont,
                                             regions=regions,
                                             l_freq=l_freq,
                                             h_freq=h_freq,
                                             notch_freqs=notch_freqs,
                                             chan_exclusion_thresh=chan_exclusion_thresh,
                                             verbose=verbose,
                                             output_dir=output_dir)
        time_eeg = time_eeg[roi]
        chans = time_eeg.index.values
        # start_cut samples are cut from session start, on top of session_cut, to make for even epoching.
        split_every = int(event_len)
        start_cut = time_eeg.iloc[:, session_cut:time_eeg.shape[1]-session_cut].values.shape[1] % split_every
        n_splits = int(time_eeg.iloc[:, session_cut+start_cut:-session_cut].shape[1] / split_every)
        start_stop = aop.rolling_window(np.arange(session_cut+start_cut,
                                                  time_eeg.shape[1]-session_cut+1,
                                                  split_every), 2)
        event_idx = np.array([np.arange(start-buffer, stop+buffer)
                              for (start, stop) in start_stop.tolist()])
        event_eeg = np.swapaxes(time_eeg.values[:, event_idx], 0, 1)[None, :, :, :] # gameState x trial x chan x time
        event_eeg = {roi: xarray.DataArray(event_eeg,
                                           name=(subj_sess, roi),
                                           coords=[('gameState', ['YCab']),
                                                   ('trial', np.arange(1, event_eeg.shape[1]+1)),
                                                   ('chan', chans),
                                                   ('time', np.arange(event_eeg.shape[-1]))],
                                           dims=['gameState', 'trial', 'chan', 'time'],
                                           attrs={'sr': sr,
                                                  'buffer': buffer,
                                                  'chan_exclusion_thresh': chan_exclusion_thresh,
                                                  'session_cut': session_cut,
                                                  'start_cut': start_cut})}
        event_eeg = event_eeg[roi].stack(event=('gameState', 'trial')).transpose('event', 'chan', 'time')
        
        # -----------------------
        # Save power and phase at each timepoint, for each channel and event.
        freqs = np.arange(1, 31)
        buffer = 2500
        clip_buffer = False
        n_cycles = 5
        zero_mean = True
        log_power = False
        output_dir = '/scratch/dscho/ycab/spectral'
        save_output = True
        overwrite = True
        verbose = True
        
        power, phase = spectral_analysis.timefreq_wavelet(event_eeg,
                                                          freqs,
                                                          clip_buffer=clip_buffer,
                                                          n_cycles=n_cycles,
                                                          zero_mean=zero_mean,
                                                          log_power=log_power,
                                                          output_dir=output_dir,
                                                          save_output=save_output,
                                                          overwrite=overwrite,
                                                          verbose=verbose)
        
        # -----------------------
        # Run FOOOF on spectral powers for each channel and event.
        freqs = np.arange(1, 31)
        peak_width_limits = (1, 8)
        min_peak_height = 0.2
        max_n_peaks = 4
        peak_threshold = 2
        aperiodic_mode = 'fixed'
        output_dir = '/scratch/dscho/ycab/fooof'
        save_output = True
        overwrite = True
        verbose = False
        
        fg = spectral_analysis.run_fooof(subj_sess,
                                         roi,
                                         power=power,
                                         freqs=freqs,
                                         peak_width_limits=peak_width_limits,
                                         min_peak_height=min_peak_height,
                                         max_n_peaks=max_n_peaks,
                                         peak_threshold=peak_threshold,
                                         aperiodic_mode=aperiodic_mode,
                                         output_dir=output_dir,
                                         save_output=save_output,
                                         overwrite=overwrite,
                                         verbose=verbose)
        
        # -----------------------
        # Run P-episode.
        freqs = np.arange(1, 31)
        cycle_thresh = 3
        thresh_req = 'mean'
        chi2_pctl = 0.95
        output_dir = '/scratch/dscho/ycab/p_episode'
        save_output = True
        overwrite = True
        verbose = True
        
        osc_mask = spectral_analysis.run_p_episode(subj_sess,
                                                   roi,
                                                   freqs=freqs,
                                                   cycle_thresh=cycle_thresh,
                                                   thresh_req=thresh_req,
                                                   chi2_pctl=chi2_pctl,
                                                   output_dir=output_dir,
                                                   save_output=save_output,
                                                   overwrite=overwrite,
                                                   verbose=verbose)
        data_out = {'power': power,
                    'phase': phase,
                    'fg': fg,
                    'osc_mask': osc_mask}
        return data_out
    except:
        err = sys.exc_info()
        errf = '/home1/dscho/logs/TryExceptError-save_spectral_fooof_p_episode_ycab_parallel-{}-{}'.format(subj_sess, roi)
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')
        return err

In [106]:
# Load the subject dataframe (each row = 1 channel)
subj_df = phlock.get_subj_df()
idx = np.where(subj_df.query("(subj!='U367')")['location'].apply(lambda x: x[-1]=='H'))[0]
subjs = subj_df.query("(subj!='U367')").iloc[idx, :]['subj'].unique()
subj_df = subj_df.query("(subj=={})".format(subjs.tolist())).reset_index(drop=True)

# Fix session naming.
subj_df['subj_sess'] = subj_df['subj_sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
subj_df['sess'] = subj_df['sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
yc_subjs = np.sort(subj_df['subj'].unique())
yc_sessions = np.sort(subj_df['subj_sess'].unique())

# Add paths to the processed LFP channels.
data_dir = '/scratch/dscho/ycab/eeg'
subj_df['proc_lfp_file'] = subj_df.apply(lambda x: op.join(data_dir, x['subj'], x['sess'], 'micro_lfps',
                                                           'V-to-muV_sr1000_bandpass0.1-80_notch60',
                                                           'CSC{}.pkl'.format(x['chan'])), axis=1)

print('{} subjects and {} sessions'.format(len(yc_subjs), len(yc_sessions)))
print('subj_df: {}'.format(subj_df.shape))

18 subjects and 43 sessions
subj_df: (2696, 14)


In [186]:
# Get a list of all regions to process.
overwrite = True
data_dir = '/scratch/dscho/ycab'
processed = []
to_process = []
for subj_sess in yc_sessions:
    mont = (subj_df.query("(subj_sess=='{}')".format(subj_sess))
                   .groupby(['location'])['chan']
                   .apply(lambda x: np.array([int(chan) for chan in x])))
    for roi in mont.keys():
        subj_sess_roi = '{}-{}'.format(subj_sess, roi)
        basename = '{}.pkl'.format(subj_sess_roi)
        output_files = (op.join(data_dir, 'spectral', 'power', basename),
                        op.join(data_dir, 'spectral', 'phase', basename),
                        op.join(data_dir, 'fooof', basename.replace('.pkl', '.json')),
                        op.join(data_dir, 'p_episode', basename))
        if np.all([op.exists(f) for f in output_files]) and not overwrite:
            processed.append(subj_sess_roi)
        else:
            to_process.append({'subj_sess': subj_sess,
                               'roi': roi,
                               'mont': mont})

print('{}/{} regions to process'.format(len(to_process), len(processed) + len(to_process)))

336/336 regions to process


In [47]:
timer = Timer()

data_in = to_process[0]
print('{}-{}'.format(data_in['subj_sess'], data_in['roi']), end='\n'*2)
data_out = save_spectral_fooof_p_episode_ycab_parallel(data_in)

print(timer)

U369_ses1-LA

U369_ses1
---------
Kept 8/8 (100%) channels across 1 regions
Ran in 1.8s

Saved /scratch/dscho/ycab/spectral/power/U369_ses1-LA.pkl
Saved /scratch/dscho/ycab/spectral/phase/U369_ses1-LA.pkl
Saved /scratch/dscho/ycab/p_episode/U369_ses1-LA.pkl
Ran in 115.0s


In [None]:
# Get channels to process.
start_time = time()

# Parallel processing
n_ops = len(to_process)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 12)), cores_per_job=12, retries=2) as view:
    output = view.map(save_spectral_fooof_p_episode_ycab_parallel, to_process)
    
print('Done in {:.1f}s'.format(time() - start_time))

Running code for 336 operations.

12 Engines running


ERROR:asyncio:Exception in callback BaseAsyncIOLoop._handle_events(56, 1)
handle: <Handle BaseAsyncIOLoop._handle_events(56, 1)>
Traceback (most recent call last):
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home1/dscho/anaconda3/envs/memlab/lib/python3.6/site-packages/zmq/

In [15]:
# Clip out the buffer and constrain frequencies to reduce file sizes.
timer = Timer()
freqs = np.arange(1, 31)
# data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2'
data_dir = '/scratch/dscho/ycab'
power_files = glob(op.join(data_dir, 'spectral', 'power', 'U*.pkl'))
phase_files = glob(op.join(data_dir, 'spectral', 'phase', 'U*.pkl'))
osc_mask_files = glob(op.join(data_dir, 'p_episode', 'U*.pkl'))
all_files = phase_files + power_files + osc_mask_files
print('{} files'.format(len(all_files)))

for iFile, fpath in enumerate(all_files):
    dat = dio.open_pickle(fpath)
    dat = dat.loc[:, :, :, dat.buffer:dat.time.size-dat.buffer-1]
    if 'clip_buffer' in dat.attrs:
        dat.attrs['clip_buffer'] = True
    dio.save_pickle(dat, fpath, verbose=False)

print(timer)

1008 files
Ran in 14391.3s


# Clip out buffer and resave files

In [26]:
def clip_the_buffer_parallel(fpath):
    """Remove the buffer and resave file."""
    from time import sleep
    import random
    sleep(random.randint(0, 120))
    import sys
    import os
    sys.path.append('/home1/dscho/code/general')
    import data_io as dio
    
    try:
        dat = dio.open_pickle(fpath)
        if (dat.buffer > 0) & (not dat.clip_buffer):
            dat = dat.loc[:, :, :, dat.buffer:dat.time.size-dat.buffer-1]
            dat.attrs['clip_buffer'] = True
            dio.save_pickle(dat, fpath, verbose=False)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-clip_the_buffer_parallel-{}'
                .format('-'.join(all_files[0].split('/')[-2:]).replace('.pkl', '')))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [4]:
# Clip out the buffer and constrain frequencies to reduce file sizes.
# data_dir = '/scratch/dscho/ycab'
# data_dir = '/scratch/dscho/goldmine/delay'
data_dir = '/scratch/dscho/goldmine/nav'
power_files = glob(op.join(data_dir, 'spectral', 'power', 'U*.pkl'))
phase_files = glob(op.join(data_dir, 'spectral', 'phase', 'U*.pkl'))
osc_mask_files = glob(op.join(data_dir, 'p_episode', 'U*.pkl'))
all_files = phase_files + power_files + osc_mask_files
print('{} files'.format(len(all_files)))

# for iFile, fpath in enumerate(all_files):
#     dat = dio.open_pickle(fpath)
#     if (dat.buffer > 0) & (not dat.clip_buffer):
#         dat = dat.loc[:, :, :, dat.buffer:dat.time.size-dat.buffer-1]
#         dat.attrs['clip_buffer'] = True
#         dio.save_pickle(dat, fpath, verbose=True)

339 files
Ran in 0.1s


In [28]:
# Parallel processing
n_ops = len(all_files)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 12)), cores_per_job=8, retries=2) as view:
    output = view.map(clip_the_buffer_parallel, all_files)

# Run Goldmine phase-locking

In [3]:
def unit_to_lfp_phase_locking_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'goldmine'
        game_states = ['Encoding', 'Retrieval']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges=['hpc-local', 'hpc-hpc', 'hpc-ctx', 'ctx-local', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        mask_phase = True
        n_perm = 1000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking(unit,
                                                          expmt=expmt,
                                                          game_states=game_states,
                                                          n_rois=n_rois,
                                                          keep_same_hem=keep_same_hem,
                                                          keep_edges=keep_edges,
                                                          exclude_gen_rois=exclude_gen_rois,
                                                          mask_phase=mask_phase,
                                                          n_perm=n_perm,
                                                          data_dir=data_dir,
                                                          save_output=save_output,
                                                          overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [4]:
# Load spikes.
fr_thresh = 0.2
nspike_thresh = 0 # 400
n_rois = 8
expmt = 'goldmine'
game_states = ['Encoding', 'Retrieval']
exclude_gen_rois = ['Other']

spikes = unit_lfp_comparison.load_all_unit_spikes(fr_thresh=fr_thresh,
                                                  nspike_thresh=nspike_thresh,
                                                  n_rois=n_rois,
                                                  expmt=expmt,
                                                  game_states=game_states)
spikes = spikes.query("(roi_gen!={})".format(exclude_gen_rois)).reset_index(drop=True)

print('spikes: {}'.format(spikes.shape))

10 subjects, 12 sessions
spikes: (448, 10)
Ran in 30.6s
spikes: (402, 10)


In [15]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
output_dir = op.join(data_dir, 'phase_locking')
overwrite = 0
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

402/402 units to process


In [22]:
# # Serial processing.
# timer = Timer()

# game_states = ['Encoding', 'Retrieval']
# n_rois = 8
# keep_same_hem = [True]
# keep_edges=['hpc-local', 'hpc-hpc', 'hpc-ctx', 'ctx-local', 'ctx-hpc', 'ctx-ctx']
# exclude_gen_rois = ['Other']
# mask_phase = True
# n_perm = 10
# data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
# save_output = True
# overwrite = True

# unit = spikes.iloc[-1]
# # unit = spikes.loc[spikes.query("(subj_sess=='U518_ses0') & (chan==11) & (unit==1)").index[0]]
# pl_mrls = unit_lfp_comparison.unit_to_lfp_phase_locking(unit,
#                                                         game_states=game_states,
#                                                         n_rois=n_rois,
#                                                         keep_same_hem=keep_same_hem,
#                                                         keep_edges=keep_edges,
#                                                         exclude_gen_rois=exclude_gen_rois,
#                                                         mask_phase=mask_phase,
#                                                         n_perm=n_perm,
#                                                         data_dir=data_dir,
#                                                         save_output=save_output,
#                                                         overwrite=overwrite)

# print(timer)

Saved /home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav/phase_locking/U540_ses0-56-1.pkl
pl_mrls: (1, 29)
Ran in 11.4s
Ran in 11.6s


In [None]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 25)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_parallel, [vals for (idx, vals) in _spikes.iterrows()])

Running code for 402 operations.

25 Engines running


## osc2mask, unmatched

In [35]:
def unit_to_lfp_phase_locking_osc2mask_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'goldmine'
        game_states = ['Encoding', 'Retrieval']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges=['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        n_perm = 1000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                   expmt=expmt,
                                                                   game_states=game_states,
                                                                   n_rois=n_rois,
                                                                   keep_same_hem=keep_same_hem,
                                                                   keep_edges=keep_edges,
                                                                   exclude_gen_rois=exclude_gen_rois,
                                                                   n_perm=n_perm,
                                                                   data_dir=data_dir,
                                                                   save_output=save_output,
                                                                   overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_osc2mask_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [36]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
output_dir = op.join(data_dir, 'phase_locking', 'osc2mask')
overwrite = 0
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

402/402 units to process


In [48]:
# Serial processing.
unit = spikes.query("(subj_sess=='U540_ses0') & (chan==18) & (unit==1)").iloc[0]
expmt = 'goldmine'
game_states = ['Encoding', 'Retrieval']
n_rois = 8
keep_same_hem = [True]
keep_edges=['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
exclude_gen_rois = ['Other']
n_perm = 10
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
save_output = True
overwrite = True

pl_mrls = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                 expmt=expmt,
                                                                 game_states=game_states,
                                                                 n_rois=n_rois,
                                                                 keep_same_hem=keep_same_hem,
                                                                 keep_edges=keep_edges,
                                                                 exclude_gen_rois=exclude_gen_rois,
                                                                 n_perm=n_perm,
                                                                 data_dir=data_dir,
                                                                 save_output=save_output,
                                                                 overwrite=overwrite)

In [50]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 50)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_osc2mask_parallel, [vals for (idx, vals) in _spikes.iterrows()])

## osc2mask, matched

In [61]:
def unit_to_lfp_phase_locking_osc2mask_matched_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'goldmine'
        game_states = ['Encoding', 'Retrieval']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges=['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        match_spikes = True
        n_perm = 1000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
        output_dir = os.path.join(data_dir, 'phase_locking', 'osc2mask_matched')
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                   expmt=expmt,
                                                                   game_states=game_states,
                                                                   n_rois=n_rois,
                                                                   keep_same_hem=keep_same_hem,
                                                                   keep_edges=keep_edges,
                                                                   exclude_gen_rois=exclude_gen_rois,
                                                                   match_spikes=match_spikes,
                                                                   n_perm=n_perm,
                                                                   data_dir=data_dir,
                                                                   output_dir=output_dir,
                                                                   save_output=save_output,
                                                                   overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_osc2mask_matched_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [54]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
output_dir = op.join(data_dir, 'phase_locking', 'osc2mask_matched')
overwrite = 0
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

402/402 units to process


In [60]:
# Serial processing.
unit = spikes.query("(subj_sess=='U540_ses0') & (chan==18) & (unit==1)").iloc[0]
expmt = 'goldmine'
game_states = ['Encoding', 'Retrieval']
n_rois = 8
keep_same_hem = [True]
keep_edges=['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
exclude_gen_rois = ['Other']
match_spikes = True
n_perm = 10
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/goldmine/nav'
output_dir = op.join(data_dir, 'phase_locking', 'osc2mask_matched')
save_output = True
overwrite = False

pl_mrls = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                 expmt=expmt,
                                                                 game_states=game_states,
                                                                 n_rois=n_rois,
                                                                 keep_same_hem=keep_same_hem,
                                                                 keep_edges=keep_edges,
                                                                 exclude_gen_rois=exclude_gen_rois,
                                                                 match_spikes=match_spikes,
                                                                 n_perm=n_perm,
                                                                 data_dir=data_dir,
                                                                 output_dir=output_dir,
                                                                 save_output=save_output,
                                                                 overwrite=overwrite)

In [None]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 50)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_osc2mask_matched_parallel, [vals for (idx, vals) in _spikes.iterrows()])

Running code for 402 operations.

50 Engines running


# Run YCab phase-locking

In [13]:
def unit_to_lfp_phase_locking_ycab_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'ycab'
        game_states = ['YCab']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges = ['hpc-local', 'ctx-local', 'ctx-hpc'] #['hpc-local', 'hpc-hpc', 'hpc-ctx', 'ctx-local', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        mask_phase = True
        n_perm = 10000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
        output_dir = os.path.join(data_dir, 'phase_locking', '10000perm')
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking(unit,
                                                          expmt=expmt,
                                                          game_states=game_states,
                                                          n_rois=n_rois,
                                                          keep_same_hem=keep_same_hem,
                                                          keep_edges=keep_edges,
                                                          exclude_gen_rois=exclude_gen_rois,
                                                          mask_phase=mask_phase,
                                                          n_perm=n_perm,
                                                          data_dir=data_dir,
                                                          output_dir=output_dir,
                                                          save_output=save_output,
                                                          overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_ycab_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [6]:
# Load the subject dataframe (each row = 1 channel)
subj_df = phlock.get_subj_df()
idx = np.where(subj_df.query("(subj!='U367')")['location'].apply(lambda x: x[-1]=='H'))[0]
subjs = subj_df.query("(subj!='U367')").iloc[idx, :]['subj'].unique()
subj_df = subj_df.query("(subj=={})".format(subjs.tolist())).reset_index(drop=True)

# Fix session naming.
subj_df['subj_sess'] = subj_df['subj_sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
subj_df['sess'] = subj_df['sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
yc_subjs = np.sort(subj_df['subj'].unique())
yc_sessions = np.sort(subj_df['subj_sess'].unique())

# Add paths to the processed LFP channels.
data_dir = '/scratch/dscho/ycab/eeg'
subj_df['proc_lfp_file'] = subj_df.apply(lambda x: op.join(data_dir, x['subj'], x['sess'], 'micro_lfps',
                                                           'V-to-muV_sr1000_bandpass0.1-80_notch60',
                                                           'CSC{}.pkl'.format(x['chan'])), axis=1)

print('{} subjects and {} sessions'.format(len(yc_subjs), len(yc_sessions)))
print('subj_df: {}'.format(subj_df.shape))

18 subjects and 43 sessions
subj_df: (2696, 14)


In [9]:
# Load spikes.
fr_thresh = 0.2
nspike_thresh = 0 # 400
n_rois = 8
expmt = 'ycab'
exclude_gen_rois = ['Other']

spikes = unit_lfp_comparison.load_all_unit_spikes(fr_thresh=fr_thresh,
                                                  nspike_thresh=nspike_thresh,
                                                  n_rois=n_rois,
                                                  expmt=expmt)
spikes = spikes.query("(roi_gen!={})".format(exclude_gen_rois)).reset_index(drop=True)

print('spikes: {}'.format(spikes.shape))

spikes: (1633, 10)
Ran in 11.9s
spikes: (1538, 10)


In [10]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
output_dir = op.join(data_dir, 'phase_locking', '10000perm')
overwrite = False
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

1538/1538 units to process


In [291]:
# # Serial processing.
# timer = Timer()

# expmt = 'ycab'
# game_states = ['YCab']
# n_rois = 8
# keep_same_hem = [True]
# keep_edges=['hpc-local', 'hpc-hpc', 'hpc-ctx', 'ctx-local', 'ctx-hpc', 'ctx-ctx']
# exclude_gen_rois = ['Other']
# mask_phase = True
# n_perm = 1000
# data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
# save_output = True
# overwrite = True

# unit = spikes.iloc[0]
# pl_mrls = unit_lfp_comparison.unit_to_lfp_phase_locking(unit,
#                                                         expmt=expmt,
#                                                         game_states=game_states,
#                                                         n_rois=n_rois,
#                                                         keep_same_hem=keep_same_hem,
#                                                         keep_edges=keep_edges,
#                                                         exclude_gen_rois=exclude_gen_rois,
#                                                         mask_phase=mask_phase,
#                                                         n_perm=n_perm,
#                                                         data_dir=data_dir,
#                                                         save_output=save_output,
#                                                         overwrite=overwrite)

# print(timer)

In [None]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 50)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_ycab_parallel, [vals for (idx, vals) in _spikes.iterrows()])

Running code for 1538 operations.

50 Engines running


In [252]:
# Load the older phase-locking dataframe (original manuscript).
pl_df = phlock.load_pl_df(drop_repeat_connections=False)
pl_df['subj_sess'] = pl_df['subj_sess'].apply(lambda x: str_replace(x, {'env': 'ses', '1a': '1'}))
pl_df.insert(pl_df.columns.tolist().index('unit_chan_ind')+1, 'unit_chan', pl_df['unit_chan_ind'] + 1)
pl_df['subj_sess_unit'] = pl_df.apply(lambda x: '{}-{}-{}'.format(x['subj_sess'], x['unit_chan'], x['unit']), axis=1)
drop_cols = ['sess', 'subj_unit_chan', 'unit_chan_ind', 'unit_hem',
             'unit_roi', 'unit_is_hpc', 'lfp_hem', 'lfp_roi', 'lfp_is_hpc',
             'same_chan', 'same_roi', 'both_hpc', 'unit_hemroi2', 'lfp_hemroi2',
             'unit_roi2', 'lfp_roi2', 'same_roi2', 'unit_nsamp_spikes', 'spike_inds',
             'bs_mrls', 'tl_mrls', 'bs_mrls_z', 'tl_mrls_z', 'bs_ind_z', 'tl_locked_freq_z',
             'tl_locked_time_z', 'tl_locked_mrl_z', 'pref_phase', 'phase_offsets',
             'pref_phase_tl_locked_time_freq_z', 'phase_offsets_tl_locked_time_freq_z',
             'unit_roi3', 'roi', 'roi_unit_to_lfp', 'pl_freq', 'pl_strength',
             'pl_time_shift', 'pl_latency']
pl_df.drop(columns=[col for col in drop_cols if col in pl_df], inplace=True)
n_rois = 6
roi_map = spike_preproc.roi_mapping(n_rois)
pl_df.insert(pl_df.columns.tolist().index('unit_hemroi')+1, 'unit_roi_gen', pl_df['unit_hemroi'].apply(lambda x: roi_map[x[1:]]))
pl_df.insert(pl_df.columns.tolist().index('lfp_hemroi')+1, 'lfp_roi_gen', pl_df['lfp_hemroi'].apply(lambda x: roi_map[x[1:]]))
freqs = np.array([2**((i/2) - 1) for i in range(16)])
pl_df.insert(pl_df.columns.tolist().index('locked_freq_ind_z')+1, 'locked_freq_z',
             pl_df['locked_freq_ind_z'].apply(lambda x: np.round(freqs[x], 1)))
pl_df = (pl_df.query("(edge==['ctx-local', 'hpc-local', 'ctx-hpc', 'hpc-hpc'])")
              .sort_values(['subj_sess_unit', 'edge'])
              .reset_index(drop=True))
print('pl_df: {}'.format(pl_df.shape))

pl_df: (4672, 22)


## osc2mach, unmatched

In [328]:
def unit_to_lfp_phase_locking_osc2mask_ycab_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'ycab'
        game_states = ['YCab']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges = ['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        n_perm = 1000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                   expmt=expmt,
                                                                   game_states=game_states,
                                                                   n_rois=n_rois,
                                                                   keep_same_hem=keep_same_hem,
                                                                   keep_edges=keep_edges,
                                                                   exclude_gen_rois=exclude_gen_rois,
                                                                   n_perm=n_perm,
                                                                   data_dir=data_dir,
                                                                   save_output=save_output,
                                                                   overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_osc2mask_ycab_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [331]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
output_dir = op.join(data_dir, 'phase_locking', 'osc2mask')
overwrite = 0
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

0/1315 units to process


In [None]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 75)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_osc2mask_ycab_parallel, [vals for (idx, vals) in _spikes.iterrows()])

Running code for 1315 operations.

6 Engines running


## osc2mask, matched

In [7]:
def unit_to_lfp_phase_locking_osc2mask_ycab_matched_parallel(unit):
    import sys
    import os
    sys.path.append('/home1/dscho/code/projects')
    from phase_locking import unit_lfp_comparison
    
    try:
        expmt = 'ycab'
        game_states = ['YCab']
        n_rois = 8
        keep_same_hem = [True]
        keep_edges = ['hpc-hpc', 'hpc-ctx', 'ctx-hpc', 'ctx-ctx']
        exclude_gen_rois = ['Other']
        match_spikes = True
        n_perm = 1000
        data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
        output_dir = os.path.join(data_dir, 'phase_locking', 'osc2mask_matched')
        save_output = True
        overwrite = True

        _ = unit_lfp_comparison.unit_to_lfp_phase_locking_osc2mask(unit,
                                                                   expmt=expmt,
                                                                   game_states=game_states,
                                                                   n_rois=n_rois,
                                                                   keep_same_hem=keep_same_hem,
                                                                   keep_edges=keep_edges,
                                                                   exclude_gen_rois=exclude_gen_rois,
                                                                   match_spikes=match_spikes,
                                                                   n_perm=n_perm,
                                                                   data_dir=data_dir,
                                                                   output_dir=output_dir,
                                                                   save_output=save_output,
                                                                   overwrite=overwrite)
        return None
    except:
        err = sys.exc_info()
        errf = ('/home1/dscho/logs/TryExceptError-unit_to_lfp_phase_locking_osc2mask_ycab_matched_parallel-{}'
                .format('{}-{}-{}'.format(unit['subj_sess'], unit['chan'], unit['unit'])))
        os.system('touch {}'.format(errf))
        with open(errf, 'w') as f:
            f.write(str(err) + '\n')

In [10]:
# Find units to process.
data_dir = '/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab'
output_dir = op.join(data_dir, 'phase_locking', 'osc2mask_matched')
overwrite = 0
process_idx = []
for idx, unit in spikes.iterrows():
    basename = '{}-{}-{}.pkl'.format(unit['subj_sess'], unit['chan'], unit['unit'])
    if overwrite or not op.exists(op.join(output_dir, basename)):
        process_idx.append(idx)
        
_spikes = spikes.loc[process_idx, :]
print('{}/{} units to process'.format(len(_spikes), len(spikes)))

0/1538 units to process


In [None]:
# Parallel processing
n_ops = len(_spikes)
print('Running code for {} operations.\n'.format(n_ops))
with cluster_view(scheduler="sge", queue="RAM.q", num_jobs=np.min((n_ops, 75)), cores_per_job=4, retries=2) as view:
    output = view.map(unit_to_lfp_phase_locking_osc2mask_ycab_matched_parallel, [vals for (idx, vals) in _spikes.iterrows()])

Running code for 1538 operations.

7 Engines running


# Save YCab event_times

In [216]:
# Save event_times for each yellow cab session.
save_output = True
overwrite = False

timer = Timer()

for subj_sess in yc_sessions:
    # Load the event_times dataframe if it exists.
    output_f = op.join('/home1/dscho/projects/unit_activity_and_hpc_theta/data2/ycab/events',
                       '{}-event_times.pkl'.format(subj_sess))
    if op.exists(output_f) and not overwrite:
        event_times = dio.open_pickle(output_f)
        continue

    # Load EEG for each channel and event.
    subj, sess = subj_sess.split('_')
    mont = (subj_df.query("(subj_sess=='{}')".format(subj_sess))
                   .groupby(['location'])['chan']
                   .apply(lambda x: np.array([int(chan) for chan in x])))
    roi = mont.keys()[0]
    regions = [roi]
    sr = 1000
    session_cut = 60000 # cut this many samples from start and end of the session
    event_len = 30000
    buffer = 2500
    l_freq = 0.1
    h_freq = 80
    notch_freqs = [60]
    chan_exclusion_thresh = 2
    output_dir = op.join('/scratch/dscho/ycab/eeg', subj, sess, 'micro_lfps',
                         'V-to-muV_sr1000_bandpass0.1-80_notch60')
    verbose = False
    
    time_eeg = eeg_preproc.load_time_eeg(subj_sess,
                                         mont=mont,
                                         regions=regions,
                                         l_freq=l_freq,
                                         h_freq=h_freq,
                                         notch_freqs=notch_freqs,
                                         chan_exclusion_thresh=chan_exclusion_thresh,
                                         verbose=verbose,
                                         output_dir=output_dir)
    time_eeg = time_eeg[roi]
    
    # Break the session into non-overlapping events.
    chans = time_eeg.index.values
    split_every = int(event_len)
    start_cut = time_eeg.iloc[:, session_cut:time_eeg.shape[1]-session_cut].values.shape[1] % split_every
    n_splits = int(time_eeg.iloc[:, session_cut+start_cut:-session_cut].shape[1] / split_every)
    start_stop = aop.rolling_window(np.arange(session_cut+start_cut,
                                              time_eeg.shape[1]-session_cut+1,
                                              split_every), 2)
    
    # Create the event_times dataframe.
    event_times = []
    for iTrial, (start, stop) in enumerate(start_stop):
        event_times.append([iTrial+1, 'YCab', start, stop])
    cols = ['trial', 'gameState', 'start_time', 'stop_time']
    event_times = pd.DataFrame(event_times, columns=cols)
    
    # Save the dataframe.
    if save_output:
        dio.save_pickle(event_times, output_f)
        
print(timer)