In [3]:
import numpy as np
import pandas as pd
import os
import pickle
from scipy.io import wavfile
import sys
sys.path.append('/mnt/cube/tsmcpher/code/')
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util import sglxutil as sglu
from ceciestunepipe.util import sglxsync as sy
from ceciestunepipe.util import stimutil as su
from ceciestunepipe.util.spikeextractors.extractors.spikeglxrecordingextractor import spikeglxrecordingextractor as sglex
from ceciestunepipe.pipeline import searchbout as sb
from ceciestunepipe.tsm import preproc_sglx
from ceciestunepipe.util import oeutil as oeu
from ceciestunepipe.tsm import preproc_oe
import logging
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

h5py version > 2.10.0. Some extractors might not work properly. It is recommended to downgrade to version 2.10.0: 
>>> pip install h5py==2.10.0


2023-09-05 12:25:43,905 root         INFO     Running on pakhi.ucsd.edu


In [4]:
force_redo = False ##### not implemented yet

In [5]:
bird_rec_dict = {
    's_b1555_22':[
        {'sess_par_list':['2022-04-29'], # sessions with this configuration
         'stim_sess':[], # if stimuli are played, only sglx
         'sort':0, # label for this sort instance
         'software':'sglx' # sglx or oe
        },
        {'sess_par_list':['2022-08-17'], # sessions with this configuration
         'stim_sess':[], # optional, only sglx
         'sort':0, # label for this sort instance
         'software':'oe' # sglx or oe
        },],
}

In [6]:
%%time
# loop through all birds / recordings
for this_bird in bird_rec_dict.keys():
    # get all session configurations
    sess_all = bird_rec_dict[this_bird]
    # get session configuration
    for this_sess_config in sess_all:
        # loop through sessions
        for this_sess in this_sess_config['sess_par_list']:
            # which sofrtware
            this_software = this_sess_config['software']
            # build session parameter dictionary
            sess_par = {'bird':this_bird,
                        'sess':this_sess,
                        'sort':this_sess_config['sort']}
            
            if this_software == 'sglx':
                # get epochs
                sess_epochs = et.list_sgl_epochs(sess_par)
                # loop through epochs
                for this_epoch in sess_epochs:
                    # add to session parameter dictionary
                    sess_par['epoch'] = this_epoch
                    # get epoch files
                    exp_struct = et.sgl_struct(sess_par,this_epoch)
                    sgl_folders, sgl_files = sglu.sgl_file_struct(exp_struct['folders']['sglx'])
                    run_meta_files = {k:v[0] for k,v in sgl_files.items()}
                    run_recordings = {k:sglex.SpikeGLXRecordingExtractor(sglu.get_data_meta_path(v)[0]) for k,v in run_meta_files.items()}
                    print(this_bird,this_sess,this_epoch,'syncing..')

                    # get streams, from raw recording extractors and preprocessed data
                    all_streams = list(run_recordings.keys()) + ['wav']
                    # get sync pattern
                    all_syn_dict = {k:sy.get_syn_pattern(run_recordings,exp_struct,k,force=False) for k in all_streams}
                    # run sync
                    sy.sync_all(all_syn_dict,sess_par['ref_stream'],force=False)

                    # load bouts
                    hparams, bout_pd = sb.load_bouts(sess_par['bird'],sess_par['sess'],'', derived_folder='bouts_sglx',bout_file_key='bout_curated_file')
                    # keep only curated bouts
                    logger.info('filtering only good bouts from this epoch {}'.format(sess_par['epoch']))
                    drop_condition = (~bout_pd['file'].str.contains(sess_par['epoch'])) | (bout_pd['bout_check']==False)
                    bout_pd.drop(bout_pd[drop_condition].index, inplace=True)
                    bout_pd.reset_index(drop=True, inplace=True)
                    # sync bouts to spike time base
                    bout_dict, bout_syn_pd = sy.bout_dict_from_pd(bout_pd,all_syn_dict,s_f_key='wav')
                    
                    # save synced bouts
                    bout_dict_path = os.path.join(exp_struct['folders']['derived'], 'bout_dict_ap0.pkl')
                    bout_pd_path = os.path.join(exp_struct['folders']['derived'], 'bout_pd_ap0.pkl')
                    with open(bout_dict_path, 'wb') as handle:
                        pickle.dump(bout_dict, handle)
                    bout_pd.to_pickle(bout_pd_path)
                    logger.info('saved syncronized bout dict and pandas dataframe to {}, {}'.format(bout_dict_path, bout_pd_path))

                    # syn_ttl comes from the digital pin, syn_sine_ttl from the sine
                    event_name = 'wav_stim'
                    ttl_ev_name = event_name + '_sync_sine_ttl' 
                    # get the events npy file
                    npy_stim_path = os.path.join(exp_struct['folders']['derived'],ttl_ev_name + '_evt.npy')
                    stream_stim_path = os.path.join(exp_struct['folders']['derived'],event_name + '.npy')
                    trial_ttl = np.load(npy_stim_path)
                    trial_stream = np.load(stream_stim_path,mmap_mode='r')
                    # get sampling frequency
                    stim_s_f = int(all_syn_dict['nidq']['s_f'])
                    # load the stimulus name - frequency tag dictionary
                    stim_tags_dict = preproc_sglx.load_stim_tags_dict(sess_par['stim_sess'])
                    # get trial tagged dataframe
                    trial_tagged_pd = su.get_trials_pd(trial_ttl, trial_stream, stim_s_f,on_signal=sess_par['on_signal'],
                                                       tag_chan=sess_par['trial_tag_chan'],stim_tags_dict=stim_tags_dict,
                                                       trial_is_onof=True)
                    # sync stim
                    trial_dict, trial_syn_pd = sy.trial_syn_from_pd(trial_tagged_pd,all_syn_dict,s_f_key='nidq')
                    
                    # save synced stim
                    stim_dict_path = os.path.join(exp_struct['folders']['derived'],'stim_dict_ap0.pkl')
                    stim_pd_path = os.path.join(exp_struct['folders']['derived'],'stim_pd_ap0.pkl')
                    with open(stim_dict_path,'wb') as handle:
                        pickle.dump(trial_dict,handle)
                    trial_syn_pd.to_pickle(stim_pd_path)
                    logger.info('saved syncronized stim dict and pandas dataframe to {}, {}'.format(stim_dict_path, stim_pd_path))
                
            elif this_software == 'oe':
                # get epochs
                exp_struct = et.get_exp_struct(sess_par['bird'],sess_par['sess'],sort=sess_par['sort'],ephys_software=this_software)
                sess_epochs = oeu.list_oe_epochs(exp_struct)
                # loop through epochs
                for this_epoch in sess_epochs:
                    # add to session parameter dictionary
                    sess_par['epoch'] = this_epoch
                    # get eoch files
                    oe_exp_struct = et.sgl_struct(sess_par,this_epoch,ephys_software=this_software)
                    run_recordings = {'oeb':preproc_oe.get_oe_cont_recording(exp_struct,this_epoch)}
                    print(this_bird,this_sess,this_epoch,'syncing..')
                    
                    # make an all_syn_dict
                    mic_file_name = os.path.join(exp_struct['folders']['derived'],this_epoch,'wav_mic.wav')
                    wav_s_f, mic_stream = wavfile.read(mic_file_name, mmap=True)
                    all_syn_dict = {'wav': {'s_f': wav_s_f}, 
                                   'ap_0': {'s_f': run_recordings['oeb'].get_sampling_frequency()},
                                   'nidq': {'s_f': run_recordings['oeb'].get_sampling_frequency()}}

                    # make bouts pandas file for this session - match sglx format, streams already synced
                    bout_pd = et.get_epoch_bout_pd(sess_par,only_curated=True,software='oe')
                    bout_dict = preproc_oe.bout_dict_from_pd(bout_pd,all_syn_dict)

                    # save
                    bout_dict_path = os.path.join(oe_exp_struct['folders']['derived'],'bout_dict_oe.pkl')
                    bout_pd_path = os.path.join(oe_exp_struct['folders']['derived'],'bout_pd_oe.pkl')
                    with open(bout_dict_path,'wb') as handle:
                        pickle.dump(bout_dict,handle)
                    bout_pd.to_pickle(bout_pd_path)
            
            else:
                print(sess_par['software'],'not implemented..')

2023-09-05 12:25:43,962 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/sphere/speech_bci/raw_data/s_b1555_22', 'raw': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-29', 'sglx': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-29/sglx', 'kwik': '/scratch/chronic_ephys/s_b1555_22/sglx/kwik/2022-04-29', 'processed': '/mnt/sphere/chronic_ephys/proc/s_b1555_22/2022-04-29/sglx', 'derived': '/mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx', 'tmp': '/scratch/chronic_ephys/tmp', 'msort': '/scratch/chronic_ephys/s_b1555_22/sglx/msort/2022-04-29', 'ksort': '/scratch/chronic_ephys/s_b1555_22/sglx/ksort/2022-04-29/0', 'sort': '/mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx/0'}, 'files': {'par': '/scratch/chronic_ephys/s_b1555_22/sglx/ksort/2022-04-29/0/params.json', 'set': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-29/sglx/settings.isf', 'rig': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-29/sglx/rig.json', 'kwd': '/scratch/chronic_e

2023-09-05 12:25:44,116 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     Extracting digital on imec stream: there is only one channel, if any (SYN) and will be converted from an int16 channel 
2023-09-05 12:25:44,116 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     Extracting digital on imec stream: there is only one channel, if any (SYN) and will be converted from an int16 channel 
2023-09-05 12:25:44,119 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     allocated array for syn channel of size (452822823,)
2023-09-05 12:25:44,119 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     allocated array for syn channel of size (452822823,)
2023-09-05 12:25:44,119 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     allocated array for syn channel of size (452822823,)


s_b1555_22 2022-04-29 0644_g0 syncing..


2023-09-05 13:10:07,989 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     Threshold for logical hi is 0
2023-09-05 13:10:07,989 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     Threshold for logical hi is 0
2023-09-05 13:10:07,989 ceciestunepipe.util.spikeextractors.extractors.readSGLX INFO     Threshold for logical hi is 0
2023-09-05 13:10:18,713 ceciestunepipe.util.sglxsync INFO     saving events array to /mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx/0644_g0/ap_0_sync_evt.npy
2023-09-05 13:10:18,713 ceciestunepipe.util.sglxsync INFO     saving events array to /mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx/0644_g0/ap_0_sync_evt.npy
2023-09-05 13:10:18,713 ceciestunepipe.util.sglxsync INFO     saving events array to /mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx/0644_g0/ap_0_sync_evt.npy
2023-09-05 13:10:18,894 ceciestunepipe.util.sglxsync INFO     saving t_0 array to /mnt/sphere/chronic_ephys/der/s_b1555_22/2022-04-29/sglx/

KeyError: 'wav'