# Preprocess acoustic data and sync to neural data, one session at a time

This notebook is a modified version of the *1-preprocess_acoustics* in the chronic ephys processing pipeline

If *1-preprocess_acoustics* exits with errors, this notebook allows you to make manual adjustments

Common errors include:
- data streams cannot be synched (ex. neural and audio data streams are of different lengths)
- TTL events were skipped (i.e., the machine clock malfunctioned or SpikeGLX crashed and the data streams terminated at different moments)

Use the environment **songproc** to run this notebook

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import os
import pickle
from scipy.io import wavfile
import traceback

import sys
sys.path.append('/mnt/cube/lo/envs/ceciestunepipe')
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util.sound import boutsearch as bs
from ceciestunepipe.pipeline import searchbout as sb
from ceciestunepipe.util import stimutil as su
from ceciestunepipe.util import sglxutil as sglu
from ceciestunepipe.util import sglxsync as sy
from ceciestunepipe.mods import sglxsync_debug as syd
from ceciestunepipe.util.spikeextractors.extractors.spikeglxrecordingextractor import spikeglxrecordingextractor as sglex
from ceciestunepipe.util import oeutil as oeu
from ceciestunepipe.mods import preproc_sglx, preproc_oe

import logging
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO) # set to logging.INFO if you'd like to see the full readout

  if StrictVersion(h5py.__version__) > '2.10.0':
2024-10-29 10:14:08,032 root         INFO     Running on pakhi.ucsd.edu


h5py version > 2.10.0. Some extractors might not work properly. It is recommended to downgrade to version 2.10.0: 
>>> pip install h5py==2.10.0


## Set parameters

In [2]:
## default bout detection parameters that work well for zebra finches
hparams = {
    # spectrogram
    'num_freq':1024, # how many channels to use in a spectrogram
    'preemphasis':0.97,
    'frame_shift_ms':5, # step size for fft
    'frame_length_ms':10, # frame length for fft FRAME SAMPLES < NUM_FREQ!!!
    'min_level_db':-55, # minimum threshold db for computing spectrogram
    'ref_level_db':110, # reference db for computing spectrogram
    'sample_rate':None, # sample rate of your data
    
    # mel filter
    'mel_filter':False, # should a mel filter be used?
    'num_mels':1024, # how many channels to use in the mel-spectrogram
    'fmin':300, # low frequency cutoff for mel filter
    'fmax':12000, # high frequency cutoff for mel filter
    
    # spectrogram inversion
    'max_iters':200,
    'griffin_lim_iters':20,
    'power':1.5,
    
    # bout searching
    'bout_auto_file':'bout_auto.pickle', # extension for saving the auto found files
    'bout_sync_file':'bout_sync.pickle', # extension for saving the synchronized auto bouts
    'stim_sync_file':'stim_sync.pickle', # extension for saving the synchronized stim if stim session
    'bout_curated_file':'bout_curated.pickle', # extension for manually curated files
    
    # if using deep_bout_search = False, the following parameters will apply for automatic bout detection:
    'read_wav_fun':bs.read_npy_chan, # function for loading the wav_like_stream (returns fs, ndarray)
    'file_order_fun':bs.sess_file_id, # function for extracting the file ID within the session
    'min_segment':20, # minimum length of supra_threshold to consider a 'syllable' (ms)
    'min_silence':3000, # minmum distance between groups of syllables to consider separate bouts (ms)
    'min_bout':500, # min bout duration (ms)
    'peak_thresh_rms':0.55, # threshold (rms) for peak acceptance,
    'thresh_rms':0.25, # threshold for detection of syllables
    'mean_syl_rms_thresh':0.3, # threshold for acceptance of mean rms across the syllable (relative to rms of the file)
    'max_bout':180000, # exclude bouts too long (ms)
    'l_p_r_thresh':100, # threshold for n of len_ms/peaks (typycally about 2-3 syllable spans)
    'waveform_edges':1000, # get number of ms before and after the edges of the bout for the waveform sample
}

## other processing parameters
n_jobs = 1 # n_jobs for deriving bout info (errors when increased)
mic_file_ext = 'npy' # npy method more efficient than wav
force_preprocess = False # skip preprocessing for previously failed epochs
deep_bout_search = True # detect bouts using deep search -- see ceciestunepipe.mods.bout_detection_mf

In [3]:
# single session params
sess_par = {
    'bird':'z_c7r3_24',
    'sess':'2024-10-27',
    'stim_sess':[], # sessions where stimuli were presented
    'mic_list':['microphone_M','microphone_F'], # list of mics of interest, by signal name in rig.json
    'adc_list':[], # list of adc channels of interest
    'stim_list':[], # list of adc chans with the stimulus
    'nidq_ttl_list':[], # list of TTL signals form the nidq digital inputs to extract (besides the 'sync')
    'ref_stream':'ap_0', # what to synchronize everything to (sglx only, oe already synced)
    'trial_tag_chan':2, # sglx, what was the tag channel in the stimulus wave (this should come from meta et. al)
    'on_signal':1, # sglx, whether signal on is hi or lo
    'sort':'sort_0', # sort index
    'ephys_software':'sglx'
}

## Preprocess and synchronize recordings

In [4]:
# get experiment structure
exp_struct = et.get_exp_struct(sess_par['bird'],sess_par['sess'],sort=sess_par['sort'],ephys_software=sess_par['ephys_software'])

In [5]:
##### preprocess acoustics #####
if sess_par['ephys_software'] == 'sglx':
    preproc_sglx.preprocess_session(sess_par,force_redo=True)
elif sess_par['ephys_software'] == 'oe':
    preproc_oe.preprocess_session(sess_par,force_redo=True)
print('done.')

2024-10-29 10:14:08,084 root         INFO     pre-process all runs of sess 2024-10-27
2024-10-29 10:14:08,084 root         INFO     pre-process all runs of sess 2024-10-27
2024-10-29 10:14:08,084 root         INFO     pre-process all runs of sess 2024-10-27
2024-10-29 10:14:08,109 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/cube/chronic_ephys/raw/z_c7r3_24', 'raw': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27', 'sglx': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx', 'processed': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx', 'derived': '/mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx', 'tmp': '/mnt/cube/chronic_ephys/tmp/tmp'}, 'files': {'par': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/experiment.json', 'set': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/settings.isf', 'rig': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/rig.json', 'dat_mic': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx/d

derived data folder exists..
preprocessing..


2024-10-29 10:14:08,236 root         INFO     found epochs: ['0658_g0', '0959_g0', '1300_g0', '1527_g0']
2024-10-29 10:14:08,236 root         INFO     found epochs: ['0658_g0', '0959_g0', '1300_g0', '1527_g0']
2024-10-29 10:14:08,236 root         INFO     found epochs: ['0658_g0', '0959_g0', '1300_g0', '1527_g0']
2024-10-29 10:14:08,239 ceciestunepipe.util.spikeextractors.preprocess INFO     PREPROCESSING sess 2024-10-27 | epoch 0658_g0
2024-10-29 10:14:08,239 ceciestunepipe.util.spikeextractors.preprocess INFO     PREPROCESSING sess 2024-10-27 | epoch 0658_g0
2024-10-29 10:14:08,239 ceciestunepipe.util.spikeextractors.preprocess INFO     PREPROCESSING sess 2024-10-27 | epoch 0658_g0
2024-10-29 10:14:08,241 ceciestunepipe.util.spikeextractors.preprocess INFO     getting extractors
2024-10-29 10:14:08,241 ceciestunepipe.util.spikeextractors.preprocess INFO     getting extractors
2024-10-29 10:14:08,241 ceciestunepipe.util.spikeextractors.preprocess INFO     getting extractors
2024-10-29

done.


In [6]:
###### derive bout information #####
sess_bout_pd = sb.get_all_day_bouts(sess_par,hparams,n_jobs=n_jobs,ephys_software=sess_par['ephys_software'],
                                    file_ext=mic_file_ext, deep_search=deep_bout_search)

2024-10-29 11:07:10,141 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session z_c7r3_24, 2024-10-27
2024-10-29 11:07:10,141 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session z_c7r3_24, 2024-10-27
2024-10-29 11:07:10,141 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session z_c7r3_24, 2024-10-27
2024-10-29 11:07:10,223 ceciestunepipe.pipeline.searchbout INFO     getting npy files from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx
2024-10-29 11:07:10,223 ceciestunepipe.pipeline.searchbout INFO     getting npy files from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx
2024-10-29 11:07:10,223 ceciestunepipe.pipeline.searchbout INFO     getting npy files from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx
2024-10-29 11:07:10,226 ceciestunepipe.pipeline.searchbout INFO     Found 4 files
2024-10-29 11:07:10,226 ceciestunepipe.pipeline.searchbout INFO     Found 4 files
2024-

  0%|          | 0/9 [00:00<?, ?it/s]

2024-10-29 11:18:41,384 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/wav_mic.npy
2024-10-29 11:18:41,384 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/wav_mic.npy
2024-10-29 11:18:41,384 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/wav_mic.npy
2024-10-29 11:18:46,829 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:18:46,829 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:18:46,829 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream 

  0%|          | 0/9 [00:00<?, ?it/s]

2024-10-29 11:30:36,906 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/wav_mic.npy
2024-10-29 11:30:36,906 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/wav_mic.npy
2024-10-29 11:30:36,906 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/wav_mic.npy
2024-10-29 11:30:43,919 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:30:43,919 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:30:43,919 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream 

  0%|          | 0/8 [00:00<?, ?it/s]

2024-10-29 11:41:19,903 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1527_g0/wav_mic.npy
2024-10-29 11:41:19,903 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1527_g0/wav_mic.npy
2024-10-29 11:41:19,903 ceciestunepipe.mods.bout_detection_mf INFO     Getting bouts for long file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1527_g0/wav_mic.npy
2024-10-29 11:41:20,238 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:41:20,238 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream only
2024-10-29 11:41:20,238 ceciestunepipe.mods.bout_detection_mf INFO     loading male and female mic streams -- will perform bout detection on male mic stream 

  0%|          | 0/2 [00:00<?, ?it/s]

2024-10-29 11:44:14,497 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/wav_mic.npy
2024-10-29 11:44:14,497 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/wav_mic.npy
2024-10-29 11:44:14,497 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/wav_mic.npy
2024-10-29 11:44:14,502 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/wav_mic.npy
2024-10-29 11:44:14,502 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/wav_mic.npy
2024-10-29 11:44:14,502 ceciestunepipe.pipeline.searchbout INFO     could not get rate from file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27

#### Process a single epoch at a time, taking care of individual errors

In [7]:
# store bout info for each epoch
bout_syn_pd_all = []
# stim sess collect all stim as well
if len(sess_par['stim_sess']) > 0:
    trial_syn_pd_all = []

In [8]:
# get epochs
sess_epochs = et.list_ephys_epochs(sess_par)
print(sess_epochs)

2024-10-29 11:46:56,718 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/cube/chronic_ephys/raw/z_c7r3_24', 'raw': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27', 'sglx': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx', 'processed': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx', 'derived': '/mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx', 'tmp': '/mnt/cube/chronic_ephys/tmp/tmp'}, 'files': {'par': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/experiment.json', 'set': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/settings.isf', 'rig': '/mnt/cube/chronic_ephys/raw/z_c7r3_24/2024-10-27/sglx/rig.json', 'dat_mic': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx/dat_mic.mat', 'dat_ap': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx/dat_ap.mat', 'allevents': '/mnt/cube/chronic_ephys/proc/z_c7r3_24/2024-10-27/sglx/dat_all.pkl', 'wav_mic': '/mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/wav_mic.wav'}}
20

['0658_g0', '0959_g0', '1300_g0', '1527_g0']


### Debugging: check that all streams are the same length (address ttl events error)

In [23]:
sess_par['epoch'] = '1300_g0' # problematic epoch
epoch_struct = et.sgl_struct(sess_par,sess_par['epoch'],ephys_software=sess_par['ephys_software'])

# get epoch files
sgl_folders, sgl_files = sglu.sgl_file_struct(epoch_struct['folders']['sglx'])
run_meta_files = {k:v[0] for k,v in sgl_files.items()}
run_recordings = {k:sglex.SpikeGLXRecordingExtractor(sglu.get_data_meta_path(v)[0]) for k,v in run_meta_files.items()}

# get streams, from raw recording extractors and preprocessed data
all_streams = list(run_recordings.keys()) #+ ['wav'] ### might want to just remove this
# get sync pattern
all_syn_dict = {k:sy.get_syn_pattern(run_recordings,epoch_struct,k,force=False) for k in all_streams}

for stream in all_syn_dict.keys():
    time_end = np.shape(all_syn_dict[stream]['t_0'])[0]/all_syn_dict[stream]['s_f']/60
    print(stream+' recording ends at '+str(int(np.floor(time_end)))+':'+f"{round((time_end % 1)*60):02d}")
    print('n samples:',np.shape(all_syn_dict[stream]['t_0'])[0],'\n')

2024-10-29 12:53:03,246 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.
2024-10-29 12:53:03,246 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.
2024-10-29 12:53:03,246 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.
2024-10-29 12:53:03,259 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 12:53:03,259 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 12:53:03,259 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 12:53:03,260 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/nidq_sync_dict.pkl
2024-10-29 12:53:03,260 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/nidq_sync_dict.pkl
2024-10-29 12:53:03,260 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sgl

nidq recording ends at 146:48
n samples: 352309729 

ap_0 recording ends at 140:30
n samples: 252888276 



In [24]:
## debugging: check if streams have skipped heartbeats
def check_skipped(one_syn_dict: dict, round_ms=50):
    no_skips = True
    evt_arr = one_syn_dict['evt_arr']
    evt_t = one_syn_dict['t_0'][evt_arr[0]]
    print('Event array has {} events'.format(evt_arr.size//2))
    
    # get the unique periods, rounded at round_ms (default 50 ms)
    evt_period_ms = np.unique(np.round((np.unique(np.diff(evt_t))*1000)/round_ms)*round_ms).astype(int)
    if evt_period_ms.size > 1:
        evt_diff = (np.round(np.diff(evt_t)*1000/round_ms)*round_ms).astype(int)
        no_skips = False
        bad_periods = np.where(evt_diff != np.argmax(np.bincount(evt_diff)))[0]
        print('More than 1 different periods detected: {}'.format(evt_period_ms))
        print('Most periods equal to {} -- bad periods:'.format(np.argmax(np.bincount(evt_diff))))
        for bp in bad_periods:
            print('evt_diff['+str(bp)+']='+str(evt_diff[bp]))
    
    # check that the diff between every other edge is zero
    period_diff = np.hstack([np.diff(evt_arr[1][1:][::2]), np.diff(evt_arr[1][::2])])
    if not (all(period_diff==0)):
        no_skips = False
        print('Difference between corresponding periodic edges is not zero: {}'.format(np.unique(period_diff)))
    
    if no_skips: print('No skipped heartbeats')

for stream in all_syn_dict.keys():
    print('Checking for skipped heartbeats in',stream,'stream:')
    check_skipped(all_syn_dict[stream]);print()

Checking for skipped heartbeats in nidq stream:
Event array has 17615 events
No skipped heartbeats

Checking for skipped heartbeats in ap_0 stream:
Event array has 16860 events
No skipped heartbeats



#### Record epochs with mismatched streams and which stream is shortest

In [11]:
mismatched_streams = {
    '0658_g0': (False,),
    '0959_g0': (False,),
    '1300_g0': (True, 'ap_0'),
    '1527_g0': (False,)
}

In [12]:
# loop through epochs:
epoch_list = sess_epochs # process all epochs

for this_epoch in epoch_list:
    
    sess_par['epoch'] = this_epoch
    epoch_struct = et.sgl_struct(sess_par,sess_par['epoch'],ephys_software=sess_par['ephys_software'])

    ##### synchronization - sglx #####
    print(sess_par['bird'],sess_par['sess'],this_epoch,'syncing..')
    if sess_par['ephys_software'] == 'sglx':
        # get epoch files
        sgl_folders, sgl_files = sglu.sgl_file_struct(epoch_struct['folders']['sglx'])
        run_meta_files = {k:v[0] for k,v in sgl_files.items()}
        run_recordings = {k:sglex.SpikeGLXRecordingExtractor(sglu.get_data_meta_path(v)[0]) for k,v in run_meta_files.items()}

        # get streams, from raw recording extractors and preprocessed data
        all_streams = list(run_recordings.keys()) + ['wav'] ### might want to just remove this
        # get sync pattern
        all_syn_dict = {k:sy.get_syn_pattern(run_recordings,epoch_struct,k,force=False) for k in all_streams}
        # run sync
        if mismatched_streams[this_epoch][0]:
#             syd.sync_all_mismatched_streams(all_syn_dict,sess_par['ref_stream'],mismatched_streams[this_epoch][1],force=False)
            sync_all_mismatched_streams(all_syn_dict,sess_par['ref_stream'],mismatched_streams[this_epoch][1],force=False)
        else:
            sy.sync_all(all_syn_dict,sess_par['ref_stream'],force=False)

        # load bouts
        hparams, bout_pd = sb.load_bouts(sess_par['bird'],sess_par['sess'],'', derived_folder='bouts_sglx',bout_file_key='bout_auto_file')
        # keep only epoch bouts
        logger.info('bouts from this epoch {}'.format(sess_par['epoch']))
        drop_condition = ~bout_pd['file'].str.contains(sess_par['epoch'])
        bout_pd.drop(bout_pd[drop_condition].index, inplace=True)
        bout_pd.reset_index(drop=True, inplace=True)
        # sync bouts to spike time base
        if mismatched_streams[this_epoch][0]:
            bout_dict, bout_syn_pd = syd.bout_dict_from_pd_mismatched_streams(bout_pd,all_syn_dict,s_f_key='wav')
        else:
            bout_dict, bout_syn_pd = sy.bout_dict_from_pd(bout_pd,all_syn_dict,s_f_key='wav')
        # store epoch synced bout info
        bout_syn_pd['bird'] = sess_par['bird']
        bout_syn_pd['sess'] = sess_par['sess']
        bout_syn_pd['epoch'] = sess_par['epoch']
        bout_syn_pd_all.append(bout_syn_pd)
        # save synced bouts
        bout_dict_path = os.path.join(epoch_struct['folders']['derived'],'bout_dict_ap0.pkl')
        with open(bout_dict_path, 'wb') as handle:
            pickle.dump(bout_dict, handle)
        bout_pd_path = os.path.join(epoch_struct['folders']['derived'],'bout_pd_ap0.pkl')
        bout_pd.to_pickle(bout_pd_path)
        logger.info('saved syncronized bout dict and pandas dataframe to {}, {}'.format(bout_dict_path, bout_pd_path))

        if len(sess_par['stim_sess']) > 0:
            # syn_ttl comes from the digital pin, syn_sine_ttl from the sine
            event_name = 'wav_stim'
            ttl_ev_name = event_name + '_sync_sine_ttl' 
            # get the events npy file
            npy_stim_path = os.path.join(epoch_struct['folders']['derived'],ttl_ev_name + '_evt.npy')
            stream_stim_path = os.path.join(epoch_struct['folders']['derived'],event_name + '.npy')
            trial_ttl = np.load(npy_stim_path)
            # epoch may not have trials - if so ttl file will be empty
            if len(trial_ttl) > 0:
                trial_stream = np.load(stream_stim_path,mmap_mode='r')
                # get sampling frequency
                stim_s_f = int(all_syn_dict['nidq']['s_f'])
                # load the stimulus name - frequency tag dictionary
                stim_tags_dict = preproc_sglx.load_stim_tags_dict(sess_par['stim_sess'],sess_par['bird'])
                # get trial tagged dataframe
                trial_tagged_pd = su.get_trials_pd(trial_ttl, trial_stream, stim_s_f,on_signal=sess_par['on_signal'],
                                                   tag_chan=sess_par['trial_tag_chan'],stim_tags_dict=stim_tags_dict,
                                                   trial_is_onof=True)
                # sync stim
                trial_dict, trial_syn_pd = sy.trial_syn_from_pd(trial_tagged_pd,all_syn_dict,s_f_key='nidq')
                # store epoch synced stim info
                trial_syn_pd['bird'] = sess_par['bird']
                trial_syn_pd['sess'] = sess_par['sess']
                trial_syn_pd['epoch'] = this_epoch
                trial_syn_pd_all.append(trial_syn_pd)
                # save synced stim
                stim_dict_path = os.path.join(epoch_struct['folders']['derived'],'stim_dict_ap0.pkl')
                stim_pd_path = os.path.join(epoch_struct['folders']['derived'],'stim_pd_ap0.pkl')
                with open(stim_dict_path,'wb') as handle:
                    pickle.dump(trial_dict,handle)
                trial_syn_pd.to_pickle(stim_pd_path)
                logger.info('saved syncronized stim dict and pandas dataframe to {}, {}'.format(stim_dict_path, stim_pd_path))

    ###### synchronization - oe #####
    elif sess_par['ephys_software'] == 'oe':
        # get epoch files
        run_recordings = {'oeb':preproc_oe.get_oe_cont_recording(exp_struct,this_epoch)}

        # make an all_syn_dict
        mic_file_name = os.path.join(exp_struct['folders']['derived'],this_epoch,'wav_mic-npy_meta.pickle')
        with open(mic_file_name, 'rb') as handle:
            wav_mic_meta = pickle.load(handle)
        all_syn_dict = {'wav': {'s_f':wav_mic_meta['s_f']}, 
                       'ap_0': {'s_f':run_recordings['oeb'].get_sampling_frequency()},
                       'nidq': {'s_f':run_recordings['oeb'].get_sampling_frequency()}}
        # make bouts pandas file for this session - match sglx format, streams already synced
        bout_oe_struct = et.get_exp_struct(sess_par['bird'],sess_par['sess'],sort=sess_par['sort'],ephys_software='bouts_oe')
        bout_pd_path = os.path.join(bout_oe_struct['folders']['derived'], 'bout_auto.pickle')
        bout_syn_pd = pd.read_pickle(bout_pd_path)
        bout_dict = preproc_oe.bout_dict_from_pd(bout_syn_pd,all_syn_dict)
        # store epoch synced bout info
        bout_syn_pd['bird'] = sess_par['bird']
        bout_syn_pd['sess'] = sess_par['sess']
        bout_syn_pd['epoch'] = this_epoch
        bout_syn_pd_all.append(bout_syn_pd)
        # save synced bouts
        bout_dict_path = os.path.join(epoch_struct['folders']['derived'],'bout_dict_oe.pkl')
        bout_pd_path = os.path.join(epoch_struct['folders']['derived'],'bout_pd_oe.pkl')
        with open(bout_dict_path,'wb') as handle:
            pickle.dump(bout_dict,handle)
        bout_syn_pd.to_pickle(bout_pd_path)

        if len(sess_par['stim_sess']) > 0:
            # this epoch name - get recording events path
            raw_folder = exp_struct['folders']['oe']
            epoch_path = os.path.join(raw_folder,this_epoch)
            node_path = preproc_oe.get_default_node(exp_struct,this_epoch)
            rec_path = preproc_oe.get_default_recording(node_path)
            events_path = os.path.join(rec_path,'events/Network_Events-102.0/TEXT_group_1/')
            # load stim lables / onsets
            stim_labels = np.load(os.path.join(events_path,'text.npy'))
            stim_onsets = np.load(os.path.join(events_path,'timestamps.npy'))

            # get stim onsets and offsets
            stim_on_all = []; stim_off_all = []; 
            stim_proc_path_all = []; stim_exp_path_all = [];
            stim_map_dir_all = []; stim_id_all = [];
            # loop through stim
            for stim_i in range(len(stim_labels)):
                this_stim_label = stim_labels[stim_i].astype('str')
                this_stim_onset = stim_onsets[stim_i]
                if this_stim_label[:4] == 'stim':
                    stim_exp_file = this_stim_label[5:]
                    # get stim preprocessing directory
                    stim_file_split = stim_exp_file.split('/')
                    stim_map_i = np.where([stim_file_split[i] in list(stim_map_dict.keys()) for i in range(len(stim_file_split))])[0][0]
                    stim_map_dir = stim_map_dict[stim_file_split[stim_map_i]]
                    # get remaining stim file path - identical for experiment and preprocessing
                    remaining_stim_file = '/'.join(stim_file_split[stim_map_i+1:])
                    # processing file location
                    stim_file = os.path.join(stim_map_dir,remaining_stim_file)
                    # load stim and get length
                    sf,this_wav = wavfile.read(stim_file,mmap=True)
                    stim_len = this_wav.shape[0]/sf
                    # get length of stim in samples - round up
                    stim_samp_len = int(np.ceil(stim_len * bout_dict['s_f']))
                    # get stim on / off
                    stim_on_all.append(this_stim_onset)
                    stim_off_all.append(this_stim_onset+stim_samp_len)  
                    stim_proc_path_all.append(stim_file)
                    stim_exp_path_all.append(stim_exp_file)
                    stim_map_dir_all.append(stim_map_dir)
                    stim_id_all.append(remaining_stim_file)

            # make into a pd - oe already synced
            stim_on_all_np = np.array(stim_on_all).astype('int')
            stim_off_all_np = np.array(stim_off_all).astype('int')
            stim_on_all_np_ms = 1000*(stim_on_all_np/bout_dict['s_f'])
            stim_off_all_np_ms = 1000*(stim_off_all_np/bout_dict['s_f'])
            trial_syn_pd = pd.DataFrame(np.vstack([stim_on_all_np,
                                                stim_off_all_np,
                                                stim_on_all_np_ms,
                                                stim_off_all_np_ms,
                                                stim_off_all_np_ms-stim_on_all_np_ms,
                                                stim_proc_path_all,
                                                stim_exp_path_all,
                                                stim_map_dir_all,
                                                stim_id_all]).T,
            columns=['start_sample','end_sample','start_ms','end_ms','len_ms',
                     'proc_file','exp_file','map_dir','stim_id'])
            trial_syn_pd['start_sample'] = trial_syn_pd['start_sample'].astype('int')
            trial_syn_pd['end_sample'] = trial_syn_pd['end_sample'].astype('int')
            trial_syn_pd['start_ms'] = trial_syn_pd['start_ms'].astype('float')
            trial_syn_pd['len_ms'] = trial_syn_pd['len_ms'].astype('float')
            # store epoch synced stim info
            trial_syn_pd['bird'] = sess_par['bird']
            trial_syn_pd['sess'] = sess_par['sess']
            trial_syn_pd['epoch'] = this_epoch
            trial_syn_pd_all.append(trial_syn_pd)
            trial_dict = {
                's_f': all_syn_dict['wav']['s_f'],
                'ap_0':all_syn_dict['ap_0']['s_f'],
                'nidq':all_syn_dict['nidq']['s_f'],
                'start_ms':trial_syn_pd['start_ms'],
                'len_ms':trial_syn_pd['len_ms'],
                'start_sample':trial_syn_pd['start_sample'],
                'end_sample':trial_syn_pd['end_sample'],
                'proc_file':trial_syn_pd['proc_file'],
                'exp_file':trial_syn_pd['exp_file'],
                'map_dir':trial_syn_pd['map_dir'],
                'stim_id':trial_syn_pd['stim_id']}
            # save synced stim
            stim_dict_path = os.path.join(epoch_struct['folders']['derived'],'stim_dict_ap0.pkl')
            stim_pd_path = os.path.join(epoch_struct['folders']['derived'],'stim_pd_ap0.pkl')
            with open(stim_dict_path,'wb') as handle:
                pickle.dump(trial_dict,handle)
            trial_syn_pd.to_pickle(stim_pd_path)
            logger.info('saved syncronized stim dict and pandas dataframe to {}, {}'.format(stim_dict_path, stim_pd_path))

print('done.')

2024-10-29 11:47:04,566 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.
2024-10-29 11:47:04,566 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.
2024-10-29 11:47:04,566 ceciestunepipe.util.sglxutil INFO     no ['lf_0'] file found.


z_c7r3_24 2024-10-27 0658_g0 syncing..


2024-10-29 11:47:04,602 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 11:47:04,602 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 11:47:04,602 ceciestunepipe.util.sglxsync INFO     getting syn patterns for nidq
2024-10-29 11:47:04,606 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/nidq_sync_dict.pkl
2024-10-29 11:47:04,606 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/nidq_sync_dict.pkl
2024-10-29 11:47:04,606 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0658_g0/nidq_sync_dict.pkl
2024-10-29 11:47:35,808 ceciestunepipe.util.sglxsync INFO     getting syn patterns for ap_0
2024-10-29 11:47:35,808 ceciestunepipe.util.sglxsync INFO     getting syn patterns for ap_0
2024-10-29 11:47:35,808 ceciestunepipe.util.s

z_c7r3_24 2024-10-27 0959_g0 syncing..


2024-10-29 12:16:08,139 ceciestunepipe.util.sglxsync INFO     getting syn patterns for ap_0
2024-10-29 12:16:08,139 ceciestunepipe.util.sglxsync INFO     getting syn patterns for ap_0
2024-10-29 12:16:08,139 ceciestunepipe.util.sglxsync INFO     getting syn patterns for ap_0
2024-10-29 12:16:08,153 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/ap_0_sync_dict.pkl
2024-10-29 12:16:08,153 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/ap_0_sync_dict.pkl
2024-10-29 12:16:08,153 ceciestunepipe.util.sglxsync INFO     loading syn_dict from /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/0959_g0/ap_0_sync_dict.pkl
2024-10-29 12:16:08,171 ceciestunepipe.util.sglxsync INFO     getting syn patterns for wav
2024-10-29 12:16:08,171 ceciestunepipe.util.sglxsync INFO     getting syn patterns for wav
2024-10-29 12:16:08,171 ceciestunepipe.util.sgl

z_c7r3_24 2024-10-27 1300_g0 syncing..


2024-10-29 12:27:33,664 ceciestunepipe.util.sglxsync_debug INFO     syncing all times to ap_0
2024-10-29 12:27:33,664 ceciestunepipe.util.sglxsync_debug INFO     syncing all times to ap_0
2024-10-29 12:27:33,664 ceciestunepipe.util.sglxsync_debug INFO     syncing all times to ap_0
2024-10-29 12:27:33,666 ceciestunepipe.util.sglxsync_debug INFO      sync nidq...
2024-10-29 12:27:33,666 ceciestunepipe.util.sglxsync_debug INFO      sync nidq...
2024-10-29 12:27:33,666 ceciestunepipe.util.sglxsync_debug INFO      sync nidq...
2024-10-29 12:27:33,668 ceciestunepipe.util.sglxsync_debug INFO       t_prime file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/nidq-tp.npy not found or forced computation, getting the events
2024-10-29 12:27:33,668 ceciestunepipe.util.sglxsync_debug INFO       t_prime file /mnt/cube/chronic_ephys/der/z_c7r3_24/2024-10-27/sglx/1300_g0/nidq-tp.npy not found or forced computation, getting the events
2024-10-29 12:27:33,668 ceciestunepipe.util.sglxsync_d

ValueError: Sign of first edge transition of pattern and target dont match

#### After handling all errors, save outputs and log preprocessing complete

In [None]:
# concatenate list of synced bout data frames from each epoch and save
bout_syn_pd_all_cat = pd.concat(bout_syn_pd_all)
sb.save_auto_bouts(bout_syn_pd_all_cat,sess_par,hparams,software=sess_par['ephys_software'],bout_file_key='bout_sync_file')

# stim sess save the all sync epoch stim data frame as well
if len(sess_par['stim_sess']) > 0:
    trial_syn_pd_all_cat = pd.concat(trial_syn_pd_all)
    sb.save_auto_bouts(trial_syn_pd_all_cat,sess_par,hparams,software=sess_par['ephys_software'],bout_file_key='stim_sync_file')

# log preprocessing complete without error
log_dir = os.path.join('/mnt/cube/chronic_ephys/log', sess_par['bird'], sess_par['sess'])
with open(os.path.join(log_dir,'preprocessing.log'), 'w') as f:
    f.write(sess_par['bird']+' '+sess_par['sess']+' preprocessing complete without error\nEpochs '+', '.join(sess_epochs)+' processed\n')

#### To look up lengths of recordings to stitch them together in 2-curate_acoustics

In [40]:
sess_par['epoch'] = '1235_g0'
epoch_struct = et.sgl_struct(sess_par,sess_par['epoch'],ephys_software=sess_par['ephys_software'])
all_syn_dict = {k:sy.get_syn_pattern(run_recordings,epoch_struct,k,force=False) for k in all_streams}
print('n samples:',np.shape(all_syn_dict['ap_0']['t_0'])[0])

n samples: 14681207


In [41]:
print('sampling rate:',all_syn_dict['ap_0']['s_f'])

sampling rate: 29999.844262295082
