In [1]:
from datetime import datetime, date
from uuid import uuid4
from dateutil.tz import tzlocal
from ast import literal_eval

import numpy as np
import pandas as pd
import os
import warnings
import librosa
import pickle

from pynwb import NWBHDF5IO, NWBFile, TimeSeries
from pynwb.file import Subject
from pynwb.behavior import (
    BehavioralEpochs,
    BehavioralEvents,
    BehavioralTimeSeries,
    CompassDirection,
    EyeTracking,
    Position,
    PupilTracking,
    SpatialSeries,
)
from pynwb.epoch import TimeIntervals
from pynwb.image import ImageSeries

from scipy.io import wavfile
import wave

from ndx_manoli_meta import AssayMetadata
from nwb_utils import *
from vak_utils import *
from frequency_stats_utils import *

In [2]:
# ----- USER PARAMETERS -----
# NWB metadata
cohort_tag = 'ScnVoc'
GT = 'Scn2a'
sdate = date(2024,3,14) #20240314
session_description = f'Vocal recordings from cohort: {cohort_tag} with genotype: {GT} starting on date: {sdate}.'
lab='Manoli @ UCSF'

# snippet windowing parameters
winlen = 2
overlap = 0.25

# set parameters for making toml files
defaults_fname = 'vak_defaults.pkl' # move this to a central location
# toml_name = f'{this_rec}-predict.toml'

# set parameters for pretending annotations are full boxes
minfreq = 15000
maxfreq = 65000

# set up pathing
rootpath = 'M:\\scn2a-paper-GWJSNH'
nwbfile_path = os.path.join(rootpath,f'{cohort_tag}_nwb')
analysis_path = os.path.join(rootpath,f'{cohort_tag}_analysis')
acq_path = os.path.join(rootpath,f'{cohort_tag}_acquisition')
pred_path = os.path.join(rootpath,f'{cohort_tag}_predictions')
paths = [nwbfile_path,analysis_path,acq_path,pred_path]
for pathi in paths:
    if not os.path.isdir(pathi):
        os.makedirs(pathi)
        
# load up stored defaults for writing toml files
defaults_file = open(os.path.join('M:\\vocalizations','vak_defaults.pkl'),'rb')
defaults = pickle.load(defaults_file)

# set up metadata table
# metafile = f'metadata_{cohort_tag}.csv'
metafile = 'metadata_ScnVoc_20250101.csv'

# -------- frequency calculation parameters ----------
# times for start and end pad of the snippets
spad = 0 # in seconds
epad = 0

# parameters for Welch PSD
NFFT = 512
noverl = 400
pad = 0.05
cmap = 'jet'
cmin = -60
cmax = 30
n_mfcc = 256
n_mels = n_mfcc
fmin = 20000
fmax = 80000
psdwin = 'hann'

# parameters for mel spec
mel_nfft = 1024
mel_winlen = 900

# false positive thresholding
pwrmin = 0.9

# peak identification parameters
rel_height_param = 0.98 # point on the peak at which to calculate the bandwidth
stdmx = 1
prominence_factor = 2.5 # used to calculate how high frequency peaks need to be

# contour thresholding parameters
thresh = 0.25
grndmeanfac = 1.5

# spectrogram smooothing parameters
wiener_kernel = (4, 4)

# contour fit smoothing parameters
lowess = sm.nonparametric.lowess
fr=0.2
it=4

# mel frequencies
mf = librosa.mel_frequencies(n_mels=n_mels, fmin=fmin, fmax=fmax)

# names and descriptions of stats, in order
allstats = ['callBool','startTime','duration','f0','numberFreqBands','numberPeaks','startFreq','endFreq','minFreq','maxFreq','absMinFreq',
           'absMaxFreq','sinuosity','timeToMaxPower','frequencyAtMaxPower','totalContourLength']
descriptions = ['use call: true for include in analysis, false for false positive',
               'time of call start',
               'duration of call in s',
               'peak frequency of lowest frequency band identified by Welch PSD',
               'number of frequency peaks identified from Welch PSD',
               'number of peaks identified in contours',
               'number of signal peaks identified in contours',
               'frequency at beginning of longest contour',
               'frequency at end of longest contour',
               'lowest contour frequency reached in longest contour',
               'highest contour frequency reached in longest contour',
               'lowest contour frequency reached in call',
               'highest contour frequency reached in call',
               'sinuosity of longest contour',
               'time into call of highest power in s',
               'frequency at max power in Hz',
               'total number of bins with above threshold contour',]

# which call time data to use from the NWB file
use_annos = 'calls_vak_merge_5ms' # corresponds to all merged calls but not adjusted for time

In [3]:
# ----- Generate metadata object based on metadata read in from table -----

def make_metadata_object(atype,excl,dur,room,timeline,etho,exp,timeline_comp,
                         colors,pID,pGT,dpostpair,sID=None,sGT=None,lane=None,chamber=None):
        
    match atype:
        case 'introduction':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
#                 experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Standard introduction (vocal series).',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=False,            
            )
            
        case 'divided':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Vocal recording in home cage with animals separated by a barrier.',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=True,            
            )
            
        case 'mating':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Standard timed mating (vocal series).',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=False,                   
            )
            
        case 'finteract1':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Vocal recording in home cage with animals freely interacting.',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=False,                   
            )
            
        case 'finteract2':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Vocal recording in home cage with animals freely interacting.',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=False,               
            )
            
        case 'dividerhalf':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Vocal recording in home cage with animals divided for 15 mins then freely interacting for 15 mins.',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=True,             
            )
            
        case 'PPT':
            metaObj = AssayMetadata(
                assay_type=atype,
                exclude_flag=excl,
                duration=dur,
                room=room,
                timeline=timeline,
                ethogram=etho,
                experimenter=exp,
                timeline_complete=timeline_comp,
                colors=colors,
                assay_type__partner_ID=pID,
                assay_type__partner_GT=pGT,
                assay_type__description='Standard PPT (vocal series).',
                assay_type__days_post_pairing=dpostpair,
                assay_type__divided=False,
                assay_type__stranger_ID=sID,
                assay_type__stranger_GT=sGT,
                assay_type__PPT_lane=lane,
                assay_type__partner_chamber=chamber,
            )
            
    return metaObj

In [5]:
# ----- Loop over metadata table and write files -----

# whether to write NWB files to disk yet
write_NWB_to_disk = True

# load metadata
meta = pd.read_csv(os.path.join(analysis_path,metafile),sep=',')
meta.FocalColor = meta.FocalColor.apply(literal_eval) # convert the colors to real arrays

# -- loop over metadata
for i, ptag in enumerate(meta.PairTag):
    assay_type = meta.AssayType[i]
    nwbfilename = f'{ptag}_{assay_type}.nwb'
    print(nwbfilename)
    
    # check if file already exists
    wfullpath = os.path.join(nwbfile_path,nwbfilename)    
    if not os.path.exists(wfullpath):    
        
        # get session specific metadata
        thisdate = str(meta.RecDate[i])
        pairdate = str(meta.PairDate[i])

        # set up recording time... it would be nice to get actual video data for the times
        datepieces = get_date_from_block(thisdate)
        rtime = meta.RecTime[i]
        timepieces = rtime.split(':')
        sess_start = datetime(datepieces[0],datepieces[1],datepieces[2],int(timepieces[0]),int(timepieces[1]),0,0,tzlocal())

        session_description = f'Vocal annotations from pair {ptag} in a(n) {assay_type} assay.'
        
        # calculate days post pairing
        rdate = date(int(thisdate[0:4]),int(thisdate[4:6]),int(thisdate[6:]))
        pdate = date(int(pairdate[0:4]),int(pairdate[4:6]),int(pairdate[6:]))
        dpp = rdate-pdate

        # make NWB file
        nwbfile = NWBFile(
            session_description=session_description,
            identifier = str(uuid4()),
            session_start_time = sess_start,
            lab=lab,
            experimenter=meta.RanBy[i],
            session_id = nwbfilename[0:-4], # check this
        )

        # add subject info
        nwbfile.subject = Subject(
            subject_id = meta.FocalID[i],
            species = 'Microtus ochrogaster',
            sex = meta.FocalSex[i],
            genotype = meta.FocalGT[i]
        )

        # get assay duration
        duration = float(meta.AssayDuration[i])

        # figure out partner info
        if meta.FocalSex[i]=='F':
            pID = meta.MaleID[i]
            pGT = meta.MaleGT[i]
        elif meta.FocalSex[i]=='M':
            pID = meta.FemaleID[i]
            pGT = meta.MaleGT[i]
        else:
            print(f'Focal sex is neither F nor M; something is wrong with {ptag}.')

        # Make lab metadata object lab metadata
        if meta.AssayType[i]=='PPT':
            sID = meta.StrangerID[i]
            sGT = meta.StrangerGT[i]
            lane = int(meta.PPTlane[i])
            chamb = meta.PartnerChamber[i]
            metaObj = make_metadata_object(meta.AssayType[i],False,float(meta.AssayDuration[i]),meta.AssayRoom[i],meta.Timeline[i],
                                           meta.Ethogram[i],meta.RanBy[i],meta.FullTimeline[i],meta.FocalColor[i],pID,pGT,dpp.days,
                                           sID=sID,sGT=sGT,lane=lane,chamber=chamb)
        else:
            metaObj = make_metadata_object(meta.AssayType[i],False,float(meta.AssayDuration[i]),meta.AssayRoom[i],meta.Timeline[i],
                                           meta.Ethogram[i],meta.RanBy[i],meta.FullTimeline[i],meta.FocalColor[i],pID,pGT,dpp.days)

        # Add the test LabMetaDataExtensionExample to the NWBFile
        nwbfile.add_lab_meta_data(lab_meta_data=metaObj)

        # Add video file
        vid_path = os.path.join(meta.VideoPath[i],meta.VideoFile[i])
        vid_rel_path = os.path.relpath(vid_path,nwbfile_path)
        
        video_ext_file = ImageSeries(
            name='behaviorVideo',
            description='Raw original video.',
            unit='n.a.',
            external_file=[vid_rel_path],
            format='external',
            starting_time=meta.VideoAssayStart[i],
            rate=25.0,
        )
        
        # add to NWB file
        nwbfile.add_acquisition(video_ext_file)
        
        # get session specific audio data
        if type(meta.AudioFile[i])==str: # check whether there is an audio file
            aud_path = os.path.join(meta.AudioPath[i],meta.AudioFile[i])
            rel_path = os.path.relpath(aud_path,nwbfile_path)
            with wave.open(aud_path, "rb") as wave_file: # find sample rate
                sampling_rate = wave_file.getframerate()
            Fs = float(sampling_rate)

            # set up acquisition object
            aud_ext_file = ImageSeries( 
                name='behaviorAudio',
                description='Raw freefield audio',
                unit='n.a.',
                external_file=[rel_path],
                format='external',
                starting_time=meta.AudioAssayStart[i],
                rate=Fs,
            )

            # add to NWB file
            nwbfile.add_acquisition(aud_ext_file)
      
        if i==12:
            testfile = nwbfile
        
        # write file to disk
        if write_NWB_to_disk:
            with NWBHDF5IO(wfullpath, "w") as io:
                io.write(nwbfile)

ScnVoc17_introduction.nwb




ScnVoc18_introduction.nwb
ScnVoc19_introduction.nwb
ScnVoc20_introduction.nwb
ScnVoc21_introduction.nwb
ScnVoc22_introduction.nwb


In [17]:
# ----- WRITE SNIPPET FILES -----

alltags = meta.PairTag.to_list()

# get all files
allnwb = []
# add all the file names except PPT files to a list
allnwb += [each for each in os.listdir(nwbfile_path) if each.split('_')[0] in alltags]

for i,fname in enumerate(allnwb):
    # open file
    io = NWBHDF5IO(os.path.join(nwbfile_path,fname), mode="r") # <-- open in write to append snippets?
    nwbfile = io.read()  
    
    # get audio path
    
    audrelpath = nwbfile.acquisition['behaviorAudio'].external_file[0]
    audpath = os.path.normpath(os.path.join(nwbfile_path,audrelpath))
    pathsplit = audpath.split('\\')
    recstem = pathsplit[-1][:-4]
    # check if snippet directory exists, and make a warning if it does
    snipdir = os.path.join(nwbfile_path,'snippets')
    if not os.path.isdir(snipdir):
        os.makedirs(snipdir)
    else:
        warnings.warn("directory snippets already exists in your NWB path")
    # make data directory
    datadir = os.path.join(snipdir,recstem)
    if not os.path.isdir(datadir):
        os.makedirs(datadir)
        # write snippets
        reclen = librosa.get_duration(filename=audpath)
        print(f'Calculating snippet edges for {recstem}...')
        starts,ends = snippets_from_whole_recording(winlen,overlap,reclen)
        sample_rate, samples = wavfile.read(audpath)
        print(f'Writing snippets for {recstem}...')
        write_snippets_from_times(starts,ends,recstem,datadir,samples,sample_rate)
        print('\n')
    else:
        warnings.warn(f"directory {recstem} already exists in snippets; not writing anything")
    # I want to extend the data spec to include intermediate processing stages, but right now I will just rely
    # on the directories getting created in the right spot and infer them in later stage



Calculating snippet edges for ScnVoc17_day1...
Writing snippets for ScnVoc17_day1...


Calculating snippet edges for ScnVoc18_day1...
Writing snippets for ScnVoc18_day1...


Calculating snippet edges for ScnVoc19_day1...
Writing snippets for ScnVoc19_day1...


Calculating snippet edges for ScnVoc20_day1...
Writing snippets for ScnVoc20_day1...


Calculating snippet edges for ScnVoc21_day1...
Writing snippets for ScnVoc21_day1...


Calculating snippet edges for ScnVoc22_day1...
Writing snippets for ScnVoc22_day1...




In [18]:
# ----- WRITE VAK TOML FILES -----

alltoml = [] # keep track of toml files for writing bat file

for i,fname in enumerate(allnwb):
    # open file
    io = NWBHDF5IO(os.path.join(nwbfile_path,fname), mode="r") # <-- open in write to append snippets?
    nwbfile = io.read()
    # get audio path
    audrelpath = nwbfile.acquisition['behaviorAudio'].external_file[0]
    audpath = os.path.normpath(os.path.join(nwbfile_path,audrelpath))
    pathsplit = audpath.split('\\')
    this_rec = pathsplit[-1][:-4]

    # set up variables for writing toml
    data_dir = os.path.join(nwbfile_path,'snippets',this_rec)
    data_dir = str(data_dir).replace('\\','/')
    output_dir = os.path.join(pred_path,this_rec)
    if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
    output_dir = str(output_dir).replace('\\','/')
    annot_csv_filename = f'{this_rec}.annot.csv'
    toml_name = f'{this_rec}-predict.toml'
    alltoml.append(toml_name)

    # write toml file
    with open(os.path.join(analysis_path,toml_name),'w') as f:
      f.write('[PREP]\n')
      f.write(f'data_dir = \"{data_dir}\"\n')
      f.write(f'output_dir = \"{output_dir}\"\n')
      f.write(f"audio_format = \"{defaults['audio_format']}\"\n")
      f.write('\n')
      f.write('[SPECT_PARAMS]\n')
      f.write(f"fft_size = {defaults['fft_size']}\n")
      f.write(f"step_size = {defaults['step_size']}\n")
      f.write('\n')
      f.write('[PREDICT]\n')
      f.write(f"checkpoint_path = \"{defaults['checkpoint_path']}\"\n")
      f.write(f"labelmap_path = \"{defaults['labelmap_path']}\"\n")
      f.write(f"models = \"{defaults['models']}\"\n")
      f.write(f"batch_size = {defaults['batch_size']}\n")
      f.write(f"num_workers = {defaults['num_workers']}\n")
      f.write(f"device = \"{defaults['device']}\"\n")
      f.write(f"spect_scaler_path = \"{defaults['spect_scaler_path']}\"\n")
      f.write(f'output_dir = \"{output_dir}\"\n')
      f.write(f'annot_csv_filename = \"{annot_csv_filename}\"\n')
      f.write(f"majority_vote = {defaults['majority_vote']}\n")
      f.write(f"min_segment_dur = {defaults['min_segment_dur']}\n")
      f.write('\n')
      f.write('[TweetyNet.optimizer]\n')
      f.write(f"lr = {defaults['lr']}\n")
    f.close()

In [20]:
# ----- WRITE BAT FILE WITH TOML COMMANDS -----
bat_name = f'{cohort_tag}-predict.bat'
with open(os.path.join(analysis_path,bat_name),'w') as f:
    for toml in alltoml:
        f.write(f"vak prep {toml}\n")
    for toml in alltoml:
        f.write(f"vak predict {toml}\n")
f.close()

In [22]:
# ----- WRITE RAW OVERLAPS -----

# get all files
allnwb = []
# add all the file names except PPT files to a list
allnwb += [each for each in os.listdir(nwbfile_path) if each.split('_')[0] in alltags]

for i,fname in enumerate(allnwb):
    # open file
    io = NWBHDF5IO(os.path.join(nwbfile_path,fname), mode="r+") # <-- open in write to append snippets?
    nwbfile = io.read()
    # get audio path
    audrelpath = nwbfile.acquisition['behaviorAudio'].external_file[0]
    audpath = os.path.normpath(os.path.join(nwbfile_path,audrelpath))
    pathsplit = audpath.split('\\')
    recstem = pathsplit[-1][:-4]
    vak_raw_path = os.path.join(pred_path,recstem)
    vak_raw_file = f'{recstem}.annot.csv'
    raven_file = f'{recstem}.raven.txt'
    write_raven_from_annotation(vak_raw_path,vak_raw_file,vak_raw_path,raven_file)

    # write new file with merged overlaps
    merge_file = f'{recstem}.merged.raven.txt'
    merge_window_overlaps(vak_raw_path,vak_raw_file,vak_raw_path,merge_file,minfreq,maxfreq)

    # format merged overlaps to put in NWB
    anno = pd.read_csv(os.path.join(vak_raw_path,merge_file),sep='\t')
    anno.rename(columns={'Begin Time (s)':'begin_time'}, inplace=True)
    anno.rename(columns={'End Time (s)':'end_time'}, inplace=True)
    anno.rename(columns={'Low Freq (Hz)':'low_freq'},inplace=True)
    anno.rename(columns={'High Freq (Hz)':'high_freq'},inplace=True)
    
    if len(anno.begin_time)>0:
        
        try:
        
            testforfield = nwbfile.intervals['calls_vak_rawmerge']
            print(f'Field calls_vak_rawmerge already exists for file {fname}; moving on.')

        except:
            print(f'Merging annotations for file {fname}...')

            call_intervals = TimeIntervals(
            name="calls_vak_rawmerge",
            description="Intervals when a call was annotated; overlaps found and merged; no frequency information.",
            )

            call_intervals.add_column(name="low_freq", description="bottom of call box")
            call_intervals.add_column(name="high_freq", description="top of call box")

            for i, start in enumerate(anno.begin_time):
                end = anno.end_time[i]
                lowf = anno.low_freq[i]
                highf = anno.high_freq[i]
                call_intervals.add_row(start_time=start,stop_time=end,low_freq=lowf,high_freq=highf)

            # write call annotation object to NWB file
            nwbfile.add_time_intervals(call_intervals)
            io.write(nwbfile)
        
    else:
        print(f'No annotations for file {merge_file}; moving on.')

Merging annotations for file ScnVoc17_introduction.nwb...
Merging annotations for file ScnVoc18_introduction.nwb...
Merging annotations for file ScnVoc19_introduction.nwb...
Merging annotations for file ScnVoc20_introduction.nwb...
Merging annotations for file ScnVoc21_introduction.nwb...
Merging annotations for file ScnVoc22_introduction.nwb...


In [23]:
# ----- ADD TIME BASED FILTERING TO PIPELINE -----

assay_len = 1800

for i,fname in enumerate(allnwb):
    # open file
    io = NWBHDF5IO(os.path.join(nwbfile_path,fname), mode="r+") # <-- open in write to append snippets?
    nwbfile = io.read()
    
    # get audio path
    audrelpath = nwbfile.acquisition['behaviorAudio'].external_file[0]
    audpath = os.path.normpath(os.path.join(nwbfile_path,audrelpath))
    pathsplit = audpath.split('\\')
    recstem = pathsplit[-1][:-4]
       
    sts = nwbfile.acquisition['behaviorAudio'].starting_time
    
    try:
        
        testforfield = nwbfile.intervals['calls_assay_time']
        print(f'Field calls_assay_time already exists for file {fname}; moving on.')

    except:
        print(f'Trimming and adjusting annotations for {recstem}...')

        # load up annotation times from NWB file
        raw_starts = nwbfile.intervals['calls_vak_rawmerge'].start_time[:]
        raw_ends = nwbfile.intervals['calls_vak_rawmerge'].stop_time[:]
        raw_high = nwbfile.intervals['calls_vak_rawmerge'].high_freq[:]
        raw_low = nwbfile.intervals['calls_vak_rawmerge'].low_freq[:]

        # adjust times to fit within analysis window
        adj_starts = raw_starts[np.logical_and(raw_starts>sts,raw_ends<(sts+assay_len))] - sts
        adj_ends = raw_ends[np.logical_and(raw_starts>sts,raw_ends<(sts+assay_len))] - sts

        print(f'Went from {raw_starts.size} annotations to {adj_starts.size} annotations.')

        call_intervals = TimeIntervals(
        name="calls_assay_time",
        description='Call intervals; overlaps merged; no freq info; adjusted to fit in assay time.')

        call_intervals.add_column(name="low_freq", description="bottom of call box")
        call_intervals.add_column(name="high_freq", description="top of call box")

        for i, start in enumerate(adj_starts):
            end = adj_ends[i]
            lowf = raw_low[i]
            highf = raw_high[i]
            call_intervals.add_row(start_time=start,stop_time=end,low_freq=lowf,high_freq=highf)

        # write call annotation object to NWB file
        nwbfile.add_time_intervals(call_intervals)
        io.write(nwbfile)

        io.close()    

Trimming and adjusting annotations for ScnVoc17_day1...
Went from 7689 annotations to 7264 annotations.
Trimming and adjusting annotations for ScnVoc18_day1...
Went from 5975 annotations to 5797 annotations.
Trimming and adjusting annotations for ScnVoc19_day1...
Went from 703 annotations to 439 annotations.
Trimming and adjusting annotations for ScnVoc20_day1...
Went from 3064 annotations to 2736 annotations.
Trimming and adjusting annotations for ScnVoc21_day1...
Went from 2494 annotations to 2214 annotations.
Trimming and adjusting annotations for ScnVoc22_day1...
Went from 4906 annotations to 4730 annotations.


In [24]:
# ----- RUN CONTOUR FREQUENCY STATS CALCULATIONS AND ADD TO NWB FILES -----

# no 5 ms merge for Scn2a?
use_annos = 'calls_vak_rawmerge'

for i,fname in enumerate(allnwb):

    # open file
    io = NWBHDF5IO(os.path.join(nwbfile_path,fname), mode="r+") # <-- open in write to append snippets?
    nwbfile = io.read()
    
    # get audio path
    audrelpath = nwbfile.acquisition['behaviorAudio'].external_file[0]
    audpath = os.path.normpath(os.path.join(nwbfile_path,audrelpath))
    pathsplit = audpath.split('\\')
    recstem = pathsplit[-1][:-4]
    
    print(f'Running contour extractions and stats for {recstem}...')
    
    sts = nwbfile.acquisition['behaviorAudio'].starting_time

    # load up annotation times from NWB file
    starts = nwbfile.intervals[use_annos].start_time[:]
    ends = nwbfile.intervals[use_annos].stop_time[:]
    
    # load up audio data
    fs, auddat = wav.read(audpath)
    
    # generate snippets to work on
    allsnips = generate_all_snippets(fs,auddat,starts,ends,spad,epad)

    # set up data storage table
    nrcalls = len(allsnips)
    nrstats = len(allstats)
    alldat = nanarray(nrcalls,nrstats)
    
    # loop over snippets and do calculations
    for i,snip in enumerate(allsnips):

        # log start and end times in the table
        alldat[i,1] = starts[i]
        alldat[i,2] = ends[i]-starts[i]

        # calculate spectrogram and contours
        usecall,spec,contx,conty,f0,frqbands,tmax,ftmax = frequency_process_call(snip,fs,psdwin,NFFT,noverl,fmin,fmax,pwrmin,prominence_factor,rel_height_param,
                                                  mel_nfft,n_mels,mel_winlen,wiener_kernel,mf,fr,it,grndmeanfac) # add f0 to table
        
        # if the call is not a false positive, extract stats based on generated contours
        if usecall:
            flatx,flaty = flatten_contours_across_bands(contx,conty)
            line = get_frequency_stats_from_contours(flatx,flaty,fs,mel_winlen,mel_nfft,tmax,ftmax)
            alldat[i,0] = line[0,0]
            alldat[i,3] = f0
            alldat[i,4] = frqbands
            alldat[i,5:] = line[0,1:]

        else:
            alldat[i,0] = False
            
    # save calculations to NWB file
    
    # convert to dataframe
    df = pd.DataFrame(data=alldat,columns=allstats)
    
    # log unfiltered version
    unfiltered_freq_stats = TimeIntervals(
    name="freq_stats_unfiltered",
    description='Frequency stats for merged calls with no assay time adjustments or filtering of false positives.')

    unfiltered_freq_stats.add_column(name=allstats[0],description=descriptions[0])
    for i,statname in enumerate(allstats[2:]):
        desc = descriptions[i+2]
        unfiltered_freq_stats.add_column(name=statname,description=desc)    


    unfiltered_freq_stats = add_frequency_data_to_NWBintervals(df,unfiltered_freq_stats)     
    nwbfile.add_time_intervals(unfiltered_freq_stats)

    # log time adjusted version

    dfct = df.copy()
    dfct.startTime = dfct.startTime - sts

    # initialize NWB object
    timeadj_freq_stats = TimeIntervals(
    name="freq_stats_timeAdj",
    description='Frequency stats for merged calls with assay time adjustments but no calls removed.')

    # build out columns
    timeadj_freq_stats.add_column(name=allstats[0],description=descriptions[0])
    for i,statname in enumerate(allstats[2:]):
        desc = descriptions[i+2]
        timeadj_freq_stats.add_column(name=statname,description=desc)

    # add data   
    timeadj_freq_stats = add_frequency_data_to_NWBintervals(dfct,timeadj_freq_stats)   
    nwbfile.add_time_intervals(timeadj_freq_stats)

    # make a version with false positives and excess times filtered out
    dfctfilt = dfct.loc[(dfct['startTime']>0) & (dfct['startTime']<assay_len) & (dfct['callBool']==True)]

    # initialize NWB object
    filt_freq_stats = TimeIntervals(
    name="freq_stats_filtered",
    description='Frequency stats for merged calls by assay time and with false positives and out of bounds calls removed.')

    # build out columns
    filt_freq_stats.add_column(name=allstats[0],description=descriptions[0])
    for i,statname in enumerate(allstats[2:]):
        desc = descriptions[i+2]
        filt_freq_stats.add_column(name=statname,description=desc)

    # add data
    filt_freq_stats = add_frequency_data_to_NWBintervals(dfctfilt,filt_freq_stats)
    nwbfile.add_time_intervals(filt_freq_stats)
    
    io.write(nwbfile)
    io.close()

Running contour extractions and stats for ScnVoc17_day1...


  return f(*args, **kwargs)


Running contour extractions and stats for ScnVoc18_day1...


  return f(*args, **kwargs)
  res *= (1 - noise / lVar)
  res *= (1 - noise / lVar)


Running contour extractions and stats for ScnVoc19_day1...


  return f(*args, **kwargs)


Running contour extractions and stats for ScnVoc20_day1...


  return f(*args, **kwargs)


Running contour extractions and stats for ScnVoc21_day1...


  return f(*args, **kwargs)


Running contour extractions and stats for ScnVoc22_day1...


  return f(*args, **kwargs)
  res *= (1 - noise / lVar)
  res *= (1 - noise / lVar)
