# Curate acoustic data

Notebook within the chronic ephys processing pipeline
- 1-preprocess_acoustics
- **2-curate_acoustics**
- 3-sort_spikes
- 4-curate_spikes

Use the environment **songproc** to run this notebook

(currently using environment spikesort)

In [1]:
%matplotlib notebook
import os
import numpy as np
import pandas as pd
import IPython.display as ipd
import sys
sys.path.append('/mnt/cube/lo/envs/ceciestunepipe')
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util.sound import boutsearch as bs
from ceciestunepipe.pipeline import searchbout as sb
from ceciestunepipe.mods import curate_bouts as cb

## Set parameters

In [2]:
# session parameters
sess_par = {
    'bird':'z_c5o30_23', # bird id
    'sess':'2023-06-15', # session date
    'ephys_software':'sglx', # recording software, sglx or oe
    'stim_sess':False, # right now only for oe, empty if no stim, stim_map_dict if stim
    'trim_bouts':True, # manually trim bouts after curation
    'sort':'ksort_lo', # sort index
}

Need to update this below for zebra finch song acquisition pipeline

In [3]:
exp_struct = et.get_exp_struct(sess_par['bird'],sess_par['sess'],ephys_software=sess_par['ephys_software'])
bouts_folder = exp_struct['folders']['processed'][:-2] + 'bouts_{}'.format(sess_par['ephys_software'])
bouts_folder

'/mnt/cube/chronic_ephys/proc/z_c5o30_23/2023-06-15/sgbouts_sglx'

In [3]:
# directories / files
exp_struct = et.get_exp_struct(sess_par['bird'],sess_par['sess'],ephys_software=sess_par['ephys_software'])
bouts_folder = exp_struct['folders']['processed'][:-2] + 'bouts_{}'.format(sess_par['ephys_software'])
os.makedirs(bouts_folder,exist_ok=True)
# load bouts
hparams,bout_pd = sb.load_bouts(sess_par['bird'],sess_par['sess'],'',
                                derived_folder='bouts_{}'.format(sess_par['ephys_software']),
                                bout_file_key='bout_sync_file')
# get recording sample rate -  ensure same for all epochs
recording_sample_rate,bout_dicts_all = cb.epoch_bout_dict_sample_rate_check(bout_pd,sess_par)
# if stim session remove stim that overlap with bouts
if sess_par['stim_sess']:
    bout_pd_updated = cb.remove_stim_bouts(bout_pd,sess_par)
    print('all bouts:',len(bout_pd),' | post stim bout removal:',len(bout_pd_updated)) 
else: # no stim removal
    bout_pd_updated = bout_pd.copy()
    print('all bouts:',len(bout_pd))

all bouts: 506


## Curate bouts:
Review results of automatic bout detection algorithm to remove any false bout detections

In [14]:
bout_pd_updated = bout_pd_updated.assign(confusing=False, is_call=False)
bout_pd_updated.head(3)

Unnamed: 0,start_ms,end_ms,start_sample,end_sample,p_step,rms_p,peak_p,bout_check,file,len_ms,...,confusing,valid_waveform,valid,spectrogram,start_ms_ap_0,start_sample_ap_0,start_sample_naive,bird,sess,epoch
0,3065,10415,122600,416600,"[64.35645374070694, 97.02539678231847, 130.721...",14.247057,170.172134,False,/mnt/cube/chronic_ephys/der/z_c5o30_23/2023-06...,7350,...,True,True,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",3065.081426,91953,122600,z_c5o30_23,2023-06-15,0913_g0
1,19015,22950,760600,918000,"[8.845089109538069, 8.882323528797626, 5.25552...",14.247057,89.510078,False,/mnt/cube/chronic_ephys/der/z_c5o30_23/2023-06...,3935,...,True,True,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",19015.348734,570462,760600,z_c5o30_23,2023-06-15,0913_g0
2,877965,932345,35118600,37293800,"[4.38612237740919, 13.952502006390048, 6.29569...",14.247057,423.474875,False,/mnt/cube/chronic_ephys/der/z_c5o30_23/2023-06...,54380,...,True,True,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",877979.319275,26339433,35118600,z_c5o30_23,2023-06-15,0913_g0


In [None]:
viz_bout = cb.VizBout(bout_pd_updated,recording_sample_rate)

#### Option to listen to bout

In [13]:
idx = 10 # change index to listen to different bouts
ipd.Audio(bout_pd_updated.iloc[idx]['waveform'],rate=recording_sample_rate)

In [None]:
bpd = cb.sess_bout_summary(viz_bout.bouts_pd,show_hist=False)
print(cb.give_summary(viz_bout.bouts_pd))

## Trim bouts:
Now that you have only the true bouts, trim the start and end points to align precise with bout onset and offset. This will improve the performance of syllable segmentation and clustering in later steps.

In [None]:
if sess_par['trim_bouts']:
    trim_bouts = cb.TrimBout(viz_bout.bouts_pd,recording_sample_rate,hparams['waveform_edges'])
else:
    print('skipping bout trimming..')

In [None]:
# generate final bout df
if sess_par['trim_bouts']:
    # pull out trim values
    start_s = trim_bouts.crop_min; end_s = trim_bouts.crop_max
    bout_df_final = cb.update_trimmed_bouts(filtered_df, start_s, end_s,
                                            hparams['sample_rate'], hparams['waveform_edges'])
else:
    print('skipping bout trimming..')
    bouts_pd_final = viz_bout.bouts_pd.copy()

## Save curated acoustics

In [None]:
# save curated/trimmed bout df
sess_bouts_curated_file = os.path.join(bouts_folder,'bout_curated.pickle')
bouts_pd_final.to_pickle(sess_bouts_curated_file)
sb.save_auto_bouts(bouts_pd_final,sess_par,hparams,software=sess_par['ephys_software'],bout_file_key='bout_curated_file')
# loop through epochs - save curated data frames and dictionaires for each
for i,this_epoch in enumerate(np.unique(bouts_pd_final.epoch)):
    this_epoch_bouts_pd_final = bouts_pd_final.copy()
    epoch_struct = et.sgl_struct(sess_par,this_epoch,ephys_software=sess_par['ephys_software'])
    # get epoch bouts
    drop_condition = ~this_epoch_bouts_pd_final['file'].str.contains(this_epoch)
    this_epoch_bouts_pd_final.drop(this_epoch_bouts_pd_final[drop_condition].index, inplace=True)
    this_epoch_bouts_pd_final.reset_index(drop=True, inplace=True)
    # get bout dictionary
    this_bout_dict = bout_dicts_all[i]
    # update  bout dictionary if necessary
    if sess_par['trim_bouts']:
        trim_keys = list(this_epoch_bouts_pd_final.keys()[this_epoch_bouts_pd_final.keys().str.contains('trim')])
        for this_key in trim_keys:
            this_bout_dict[this_key] = this_epoch_bouts_pd_final[this_key]
    # save curated bout dataframe and dictionary
    bout_dict_path = os.path.join(epoch_struct['folders']['derived'],'bout_dict_ap0_curated.pkl')
    with open(bout_dict_path, 'wb') as handle:
        pickle.dump(this_bout_dict, handle)
    bout_pd_path = os.path.join(epoch_struct['folders']['derived'],'bout_pd_ap0_curated.pkl')
    this_epoch_bouts_pd_final.to_pickle(bout_pd_path)