In [1]:
import os
from boxsdk import OAuth2, Client
import re
import warnings

from datetime import datetime, date
from uuid import uuid4
from dateutil.tz import tzlocal
from ast import literal_eval

import numpy as np
import pandas as pd
import warnings
import librosa
import pickle

from pynwb import NWBHDF5IO, NWBFile, TimeSeries
from pynwb.file import Subject
from pynwb.epoch import TimeIntervals
from pynwb.image import ImageSeries
from ndx_manoli_meta import AssayMetadata

from nwb_utils import get_date_from_block
from box_utils import *
from behavior_error_checks import *

In [2]:
# ----- Set up user parameters
directory_keyword = 'Intros' # how the assay directories are labeled
metadata_file = 'metadata_intros_v4.csv' # where the initial metadata is stored
boris_keyword = 'Intro'
score_path = os.path.join('Scn2a_X1Behavior_Nov22','Aggregated_Events')
assayregex = 'I|i'
# colors for plotting
fillcols = [[160, 146, 95],[245, 201, 39],[89, 91, 125],[63, 78, 245]]
linecols = [[96, 87, 57],[147, 120, 23],[53, 54, 75],[33, 41, 131]]

In [5]:
# ----- NWB set up -----

# 1. Set up user parameters
# 2. Loop over metadata file
# 3. Get and set up metadata
# 4. Set up behavior table
# 5. Write nwb file

# -- set parameters
lab = "Manoli @ UCSF"
exclude_flag = False
# path to storage for NWB files
nwbfile_path = os.path.join('M:\\','scn2a-paper-GWJSNH','NWB-files')
# whether to write NWB files to disk yet
write_NWB_to_disk = False

# load metadata
meta = pd.read_csv(metadata_file)
meta.FocalColor = meta.FocalColor.apply(literal_eval) # convert the colors to real arrays

# keep track of overloaded files
multifile_log = []

# keep track of annotation overlaps
ol_files = []
ol_behav = []
ol_start = []
ol_end = []

# keep track of annotation gaps
g_files = []
g_b1 = []
g_b2 = []
g_end = []
g_start = []

# -- loop over metadata
for i, ptag in enumerate(meta.PairTag):
    assay_type = meta.AssayType[i]
    nwbfilename = f'{ptag}_{assay_type}.nwb'
    print(nwbfilename)
    
    # check if file already exists
    wfullpath = os.path.join(nwbfile_path,nwbfilename)    
    if not os.path.exists(wfullpath):    
        
        # get session specific metadata
        thisdate = str(meta.RecDate[i])

        # set up recording time... it would be nice to get actual video data for the times
        datepieces = get_date_from_block(thisdate)
        rtime = meta.RecTime[i]
        timepieces = rtime.split(':')
        sess_start = datetime(datepieces[0],datepieces[1],datepieces[2],int(timepieces[0]),int(timepieces[1]),0,0,tzlocal())

        session_description = f'Behavioral annotations from pair {ptag} in a(n) {assay_type} assay.'

        # make NWB file
        nwbfile = NWBFile(
            session_description=session_description,
            identifier = str(uuid4()),
            session_start_time = sess_start,
            lab=lab,
            experimenter=meta.RanBy[i],
            session_id = nwbfilename[0:-4], # check this
        )

        # add subject info
        nwbfile.subject = Subject(
            subject_id = meta.FocalID[i],
            species = 'Microtus ochrogaster',
            sex = meta.FocalSex[i],
            genotype = meta.FocalGT[i]
        )

        # get assay duration
        duration = float(meta.AssayDuration[i])

        # figure out partner info
        if meta.FocalSex[i]=='F':
            pID = meta.MaleID[i]
            pGT = meta.MaleGT[i]
        elif meta.FocalSex[i]=='M':
            pID = meta.FemaleID[i]
            pGT = meta.MaleGT[i]
        else:
            print(f'Focal sex is neither F nor M; something is wrong with {ptag}.')

        # TODO convert these to date objects and make sure this works
        # figure out time since pairing
        days = meta.RecDate[i] - meta.PairDate[i]

        # add lab metadata
        metaObj = AssayMetadata(
                        assay_type=assay_type,
                        exclude_flag=exclude_flag,
                        duration=duration,
                        room=str(meta.AssayRoom[i]),
                        timeline=str(meta.Timeline[i]),
                        ethogram=str(meta.Ethogram[i]),
                        experimenter=str(meta.ScoredBy[i]),
                        timeline_complete=meta.FullTimeline[i],
                        colors=meta.FocalColor[i],
                        introduction=os.path.join(meta.ScorePath[i],meta.ScoreFile[i]),
                        introduction__partner_ID=str(pID),
                        introduction__partner_GT=str(pGT),
                        )

        # Add the test LabMetaDataExtensionExample to the NWBFile
        nwbfile.add_lab_meta_data(lab_meta_data=metaObj)

        # TODO Add video file
        video_ext_file = ImageSeries(
            name='behaviorVideo',
            description='Raw original video.',
            unit='n.a.',
            external_file=[os.path.join(meta.VideoPath[i],meta.VideoFile[i])],
            format='external',
            starting_time=0.0,
            rate=25.0,
        )

        nwbfile.add_acquisition(video_ext_file)

        # --- Add annotated behavior data

        # Make sure these files are downloaded with the same path configuration as listed in the metadata
        # Or adjust here as I did with telling the code to go up a level
        scoretab = pd.read_csv(os.path.join('..',meta.ScorePath[i],meta.ScoreFile[i])) # load up csv of annotations

        # change problematic column names
        scoretab.rename(columns={'Start (s)':'start'}, inplace=True)
        scoretab.rename(columns={'Stop (s)':'end'}, inplace=True)
        scoretab.rename(columns={'Duration (s)':'duration'}, inplace=True)
        scoretab.rename(columns={'Behavior type':'behavior_type'}, inplace=True)

        # --- Check for problems in the annotation file

        # check for extraneous media files
        if len(np.unique(scoretab['Media file']))>1:
            warnings.warn(f'Scored csv {meta.ScoreFile[i]} contains events for multiple media files.')
            multifile_log.append(meta.ScoreFile[i])
            for fp in np.unique(scoretab['Media file']):
                print(fp)

        else:       

            # check for total event duration violations
            totdur = np.sum(scoretab.duration)
            if totdur>1.05*duration:
                warnings.warn(f'Total events duration of {meta.ScoreFile[i]} exceeds the assay duration.')
            elif totdur<0.95*duration:
                warnings.warn(f'Total events duration of {meta.ScoreFile[i]} does not meet the assay duration.')

            # check for overlapping events and check for unscored gaps between events
            # keep track of annotation overlaps
            ols,ole,olb,gs,ge,gb1,gb2 = check_for_event_interactions(scoretab,True)
            
            # log overlaps
            for j,ol in enumerate(ols):
                ol_files.append(meta.ScoreFile[i])
                ol_behav.append(olb[j])
                ol_start.append(ol)
                ol_end.append(ole[j])

            # log gaps
            for j, gp in enumerate(gs):
                g_files.append(meta.ScoreFile[i])
                g_b1.append(gb1[j])
                g_b2.append(gb2[j])
                g_end.append(ge[j])
                g_start.append(gs[j])

        # make NWB object corresponding to the annotation table
        behavior_intervals = TimeIntervals(name="annotated_behavior",
            description="Intervals of scored behavior.")

        behavior_intervals.add_column(name="behavior", description="The annotation from the ethogram.")
        behavior_intervals.add_column(name="duration", description="Duration of the behavior.")
        behavior_intervals.add_column(name="atype", description="Point or state event.")

        # populate table
        for i, start in enumerate(scoretab.start):
            end = scoretab.end[i]
            behav = scoretab.Behavior[i]
            atype = scoretab.behavior_type[i]
            dur = scoretab.duration[i]
            behavior_intervals.add_row(start_time=start,stop_time=end,behavior=behav,atype=atype,duration=dur)

        # add to NWB file
        nwbfile.add_time_intervals(behavior_intervals)
        
        # write file to disk
        if write_NWB_to_disk:
            with NWBHDF5IO(wfullpath, "w") as io:
                io.write(nwbfile)

Nov22_Pair1_introduction.nwb
Nov22_Pair15_introduction.nwb
Gap violation for behavior Investigate ending at 1401.436 and next behavior Huddle starting at 1401.935.
Nov22_Pair16_introduction.nwb
Nov22_Pair17_introduction.nwb
Nov22_Pair18_introduction.nwb
Nov22_Pair19_introduction.nwb
Nov22_Pair2_introduction.nwb
Nov22_Pair21_introduction.nwb
Nov22_Pair22_introduction.nwb
Nov22_Pair23_introduction.nwb
Nov22_Pair24_introduction.nwb
Gap violation for behavior Investigate ending at 50.367 and next behavior No interaction starting at 50.452.
Nov22_Pair25_introduction.nwb
Nov22_Pair26_introduction.nwb
Gap violation for behavior Sniff ending at 458.872 and next behavior No interaction starting at 459.12.
Nov22_Pair27_introduction.nwb
Gap violation for behavior Sniff ending at 428.418 and next behavior No interaction starting at 428.42.
Gap violation for behavior Sniff ending at 572.286 and next behavior Investigate starting at 572.29.
Gap violation for behavior No interaction ending at 770.879

KeyError: 424

In [6]:
# --- Write annotation violation logs for review
ol_log = {'file':ol_files,'behavior':ol_behav,'start_time':ol_start,'end_time':ol_end}
ol_tab = pd.DataFrame(data=ol_log)
ol_tab.to_csv(f'overlap_log_{meta.AssayType[0]}.csv',index=False)

gap_durs = []
for i,st in enumerate(g_start):
    gap_durs.append(st-g_end[i])
gap_log = {'file':g_files,'behavior1':g_b1,'behavior1_end':g_end,'behavior2':g_b2,'behavior2_start':g_start,
           'gap_duration':gap_durs}
g_tab = pd.DataFrame(data=gap_log)
g_tab.to_csv(f'gap_log_{meta.AssayType[0]}.csv',index=False)