In [57]:
import os
from boxsdk import OAuth2, Client
import re
import warnings

from datetime import datetime, date
from uuid import uuid4
from dateutil.tz import tzlocal
from ast import literal_eval

import numpy as np
import pandas as pd
import warnings
import librosa
import pickle

from pynwb import NWBHDF5IO, NWBFile, TimeSeries
from pynwb.file import Subject
from pynwb.epoch import TimeIntervals
from pynwb.image import ImageSeries
from ndx_manoli_meta import AssayMetadata

from nwb_utils import get_date_from_block

In [2]:
# ----- Support functions -----

def visit_all_dirs_files(usedir,full_list,curr_path,client):
    
    # get all the items in the current folder
    theseitems = client.folder(folder_id=usedir).get_items()
    
    for item in theseitems:     # loop over items
        tp = item.type # get key fields from directory items
        nm = item.name
        iid = item.id
        
        if tp == 'folder': # recursion if a folder is found
            curr_path.append(nm) # add directory to path
            full_list = visit_all_dirs_files(iid,full_list,curr_path) # look for more folders/files
            curr_path.pop() # clean the folder back off the path when going up a level
        else: # keep track of all the paths when there are files
            tpath = os.path.join(*curr_path)
            full_list.append(os.path.join(tpath,nm))           
    
    return full_list

def find_in_list(lst, item):
    result = []
    for i, x in enumerate(lst):
        if re.search(f'{item}[_|-|I|i]',x,re.IGNORECASE):
            result.append(i)
    return result

In [3]:
# ---- Get login credentials from environment variables -----

# - note this needs to be adjusted for each authorized user... ask Nerissa about getting and setting this information
# - developer access tokens expire quickly, so this probably needs to be set at startup and perhaps even while working
client_id = os.environ.get('box_client_id')
client_secret = os.environ.get('box_client_secret')
# access_token = os.environ.get('box_access_token') # just put this one in here because it expires anyway
access_token = 'tqzcU5UovRjEP7CnHvtuSLN6hw9BhHmg'


# ----- Open a connection to the Box server -----

auth = OAuth2(
    client_id=client_id, # put these in environmental variables
    client_secret=client_secret,
    access_token=access_token,
)
client = Client(auth)

# ----- Work on directory structure -----

# get the list of items in the whole Scn2a folder
items = client.folder(folder_id='196168550606').get_items()

# list contents
for item in items:
    print(f'{item.type.capitalize()} {item.id} is named "{item.name}"')

Folder 238576018931 is named "Aggregated_Events"
Folder 202955487033 is named "April2023_ShortCoHab_Females"
Folder 226890392440 is named "BorisFiles"
Folder 196172227913 is named "Female Intros"
Folder 196173015061 is named "Female PPTs"
Folder 196170422974 is named "Female RI"
Folder 196169230497 is named "Female SepReunion"
Folder 196171174766 is named "Female TMs"
Folder 214178119262 is named "June2023_ControlBehaviors"
Folder 196166676922 is named "Male Intros"
Folder 196172624162 is named "Male PPTs"
Folder 196171675763 is named "Male RI"
Folder 196172896956 is named "Male SepReunion"
Folder 196172739371 is named "Male TMs"
Folder 248394520156 is named "Naive_Choice"
Folder 229587753245 is named "Oct2023_JuvenileBehavior"
File 1177298175085 is named "Scn2aX1Nov22_Key.xlsx"


In [4]:
# ----- Figure out which directories have the assay of interest

assaykeyword = 'Intros' # set up for intros
projectDir = '196168550606'
rootpath = ['Scn2a_X1Behavior_Nov22']

usedirs = []
items = client.folder(folder_id=projectDir).get_items() # top level folder

for item in items:
    iid = item.id
    inm = item.name
    if inm.find(assaykeyword)>-1:
        usedirs.append(iid)

# print(usedirs)

# --- collect all the paths and filenames

allpaths = []

for usedir in usedirs: 
    
    # add starting folder to curr_path
    udnm = client.folder(folder_id=usedir).get(fields=['name']).name
    curr_path = rootpath + [udnm]
    
    # search for files
    thesepaths = visit_all_dirs_files(usedir,[],curr_path)

    # consolidate
    allpaths += thesepaths
    
print(allpaths)

['Scn2a_X1Behavior_Nov22\\Female Intros\\01032023\\Nov22_Pair13_Intro_WIN_20230103_13_45_14_Pro.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01032023\\Nov22_Pair14-Intro_WIN_20230103_13_45_36_Pro.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01032023\\Nov22_Pair15_Intro.mov', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01032023\\Nov22_Pair16_Intro_WIN_20230103_13_46_48_Pro.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01242023\\Nov22_Pair21_Intro_2023-01-24 13-25-51.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01242023\\Nov22_Pair22_Intro_2023-01-24 13-25-52.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\01242023\\Nov22_Pair23_Intro_2023-01-24 13-25-55.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\02072023\\Nov22_Pair28_Intro_2023-02-07 13-18-12.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\02072023\\Nov22_Pair29_Intro_2023-02-07 13-18-14.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\02072023\\Nov22_Pair30_Intro_2023-02-07 13-18-15.mp4', 'Scn2a_X1Behavior_Nov22\\Female Intros\\02072023\\N

In [5]:
# ----- Get a list of just file names to hunt through -----
justfiles = []
for pth in allpaths:
    justfiles.append(os.path.basename(pth))

In [6]:
# ----- Use metadata table to find expected files and add those columns to the table -----

# load up metadata
meta = pd.read_csv('metadata_intros_v1.csv')

# generate a regex of possible matches for filename
fileIndex = []
for tag in meta.PairTag:
    match = find_in_list(justfiles,tag)
    if len(match)==1:
        fileIndex.append(match[0])
    else:
        print(f'No match for {tag}:')
        
# use fileIndex to look up file names and paths
metafiles = []
metapaths = []
for i,tag in enumerate(meta.PairTag):
    metafiles.append(justfiles[fileIndex[i]])
    metapaths.append('\\'+os.path.dirname(allpaths[fileIndex[i]]))

# add files and paths to the metadata table
meta.VideoFile = metafiles
meta.VideoPath = metapaths

In [7]:
# ----- Get BORIS scored aggregated events file names -----
keyword = "Intro"
aggfiles = []
items = client.folder(folder_id='238576018931').get_items()

for item in items:
    inm = item.name
    if inm.find(keyword)>-1:
        aggfiles.append(inm)
        
print(aggfiles)
print(len(aggfiles))

['Nov22_Pair15_Intro.csv', 'Nov22_Pair16_Intro.csv', 'Nov22_Pair17_Intro.csv', 'Nov22_Pair18_Intro.csv', 'Nov22_Pair19_Intro.csv', 'Nov22_Pair1_Intro.csv', 'Nov22_Pair21_Intro.csv', 'Nov22_Pair22_Intro.csv', 'Nov22_Pair23_Intro.csv', 'Nov22_Pair24_Intro.csv', 'Nov22_Pair25_Intro.csv', 'Nov22_Pair26_Intro.csv', 'Nov22_Pair27_Intro.csv', 'Nov22_Pair28_Intro.csv', 'Nov22_Pair29_Intro.csv', 'Nov22_Pair2_Intro.csv', 'Nov22_Pair30_Intro.csv', 'Nov22_Pair31_Intro.csv', 'Nov22_Pair32_Intro.csv', 'Nov22_Pair33_Intro.csv', 'Nov22_Pair34_Intro.csv', 'Nov22_Pair35_Intro.csv', 'Nov22_Pair36_Intro.csv', 'Nov22_Pair37_Intro.csv', 'Nov22_Pair38_Intro.csv', 'Nov22_Pair39_Intro.csv', 'Nov22_Pair3_Intro.csv', 'Nov22_Pair40_Intro.csv', 'Nov22_Pair41_Intro.csv', 'Nov22_Pair42_Intro.csv', 'Nov22_Pair43_Intro.csv', 'Nov22_Pair44_Intro.csv', 'Nov22_Pair46_Intro.csv', 'Nov22_Pair47_Intro.csv', 'Nov22_Pair48_Intro.csv', 'Nov22_Pair49_Intro.csv', 'Nov22_Pair4_Intro.csv', 'Nov22_Pair50_Intro.csv', 'Nov22_Pair51_I

In [24]:
# --- add Boris csvs to metadata table
# match pair tags
ordered_agg = []
for tag in meta.PairTag:
    for fname in aggfiles:
        if fname.startswith(tag+'_'):
            ordered_agg.append(fname)

meta.ScoreFile = ordered_agg

# add path
score_path = os.path.join('Scn2a_X1Behavior_Nov22','Aggregated_Events')
score_path_arr = [score_path]*len(aggfiles)
meta.ScorePath = score_path_arr
    
# --- write metadata table
meta.to_csv('metadata_intros_v2.csv')

In [22]:
meta.head()

Unnamed: 0,PairTag,AssayType,RecDate,VideoFile,VideoPath,ScoreFile,ScorePath,FemaleID,FemaleGT,FemaleFam,...,Timeline,Ethogram,RanBy,ScoredBy,FullTimeline,FocalColor,StrangerID,StrangerGT,PPTlane,PartnerChamber
0,Nov22_Pair1,introduction,11/14/2022,Nov22_Pair1Intro_WIN_20221114_12_47_25_Pro.mp4,\Scn2a_X1Behavior_Nov22\Female Intros\11142022,Nov22_Pair1_Intro.csv,Scn2a_X1Behavior_Nov22\Aggregated_Events,B8002,Het,,...,,,Gina Williams,Josh Steighner,True,,,,,
1,Nov22_Pair15,introduction,1/3/2023,Nov22_Pair15_Intro.mov,\Scn2a_X1Behavior_Nov22\Female Intros\01032023,Nov22_Pair15_Intro.csv,Scn2a_X1Behavior_Nov22\Aggregated_Events,B6614,Het,,...,,,Gina Williams,Josh Steighner,True,,,,,
2,Nov22_Pair16,introduction,1/3/2023,Nov22_Pair16_Intro_WIN_20230103_13_46_48_Pro.mp4,\Scn2a_X1Behavior_Nov22\Female Intros\01032023,Nov22_Pair16_Intro.csv,Scn2a_X1Behavior_Nov22\Aggregated_Events,B6615,WT,,...,,,Gina Williams,Josh Steighner,True,,,,,
3,Nov22_Pair17,introduction,1/4/2023,Nov22_Pair17_Intro_WIN_20230104_12_24_30_Pro.mp4,\Scn2a_X1Behavior_Nov22\Male Intros\01042023,Nov22_Pair17_Intro.csv,Scn2a_X1Behavior_Nov22\Aggregated_Events,,,,...,,,Gina Williams,Josh Steighner,True,,,,,
4,Nov22_Pair18,introduction,1/4/2023,Nov22_Pair18_Intro_WIN_20230104_12_25_08_Pro.mp4,\Scn2a_X1Behavior_Nov22\Male Intros\01042023,Nov22_Pair18_Intro.csv,Scn2a_X1Behavior_Nov22\Aggregated_Events,,,,...,,,Gina Williams,Josh Steighner,True,,,,,


In [None]:
# set up pipeline for other assays

# sketch out pipeline for adding times from aggregated events to NWB file

In [None]:
# ----- Figure out which directories have the assay of interest

videokeyword = 'Timed_Matings' # check how the videos are actually labeled
assaymeta = 'metadata_timedMatings_v1.csv' # which metadata file to load up
boriskeyword = 'TM' # how the Boris csv files are tagged

projectDir = '196168550606' # the Box directory for the whole project
rootpath = ['Scn2a_X1Behavior_Nov22']
borisbox = '238576018931'
borispath = 'Aggregated Events'

metaoutfile = 'metadata_timedMatings_v2.csv'

usedirs = []
items = client.folder(folder_id=projectDir).get_items() # top level folder

for item in items:
    iid = item.id
    inm = item.name
    if inm.find(videokeyword)>-1:
        usedirs.append(iid)

# print(usedirs)

# --- collect all the paths and filenames

allpaths = []

for usedir in usedirs: 
    
    # add starting folder to curr_path
    udnm = client.folder(folder_id=usedir).get(fields=['name']).name
    curr_path = rootpath + [udnm]
    
    # search for files
    thesepaths = visit_all_dirs_files(usedir,[],curr_path)

    # consolidate
    allpaths += thesepaths
    
print(allpaths)

# ----- Get a list of just file names to hunt through -----
justfiles = []
for pth in allpaths:
    justfiles.append(os.path.basename(pth))
    
# ----- Use metadata table to find expected files and add those columns to the table -----

# load up metadata
meta = pd.read_csv(assaymeta)

# generate a regex of possible matches for filename
fileIndex = []
for tag in meta.PairTag:
    match = find_in_list(justfiles,tag)
    if len(match)==1:
        fileIndex.append(match[0])
    else:
        print(f'No match for {tag}:')
        
# use fileIndex to look up file names and paths
metafiles = []
metapaths = []
for i,tag in enumerate(meta.PairTag):
    metafiles.append(justfiles[fileIndex[i]])
    metapaths.append('\\'+os.path.dirname(allpaths[fileIndex[i]]))

# add files and paths to the metadata table
meta.VideoFile = metafiles
meta.VideoPath = metapaths

# ----- Get BORIS scored aggregated events file names -----
aggfiles = []
items = client.folder(folder_id=borisbox).get_items()

for item in items:
    inm = item.name
    if inm.find(boriskeyword)>-1:
        aggfiles.append(inm)
        
print(aggfiles)
print(len(aggfiles))

# --- add Boris csvs to metadata table
# match pair tags
ordered_agg = []
for tag in meta.PairTag:
    for fname in aggfiles:
        if fname.startswith(tag+'_'):
            ordered_agg.append(fname)

meta.ScoreFile = ordered_agg

# add path
score_path = os.path.join(rootpath[0],borispath)
score_path_arr = [score_path]*len(aggfiles)
meta.ScorePath = score_path_arr
    
# --- write metadata table
meta.to_csv(metaoutfile)

In [21]:
# ----- Fix bad date formatting -----
# dx = meta.pop('Unnamed: 0')

newrecdates = []
for i, olddate in enumerate(meta.RecDate):
    dsplit = olddate.split('/')
    yr = dsplit[2]
    mn = dsplit[0]
    dy = dsplit[1]
    if len(mn)==1:
        mn = '0'+mn
    if len(dy)==1:
        dy = '0'+dy
    newrecdates.append(yr+mn+dy)

In [24]:
newpairdates = []
for i, olddate in enumerate(meta.PairDate):
    dsplit = olddate.split('/')
    yr = dsplit[2]
    mn = dsplit[0]
    dy = dsplit[1]
    if len(mn)==1:
        mn = '0'+mn
    if len(dy)==1:
        dy = '0'+dy
    newpairdates.append(yr+mn+dy)

In [26]:
meta.RecDate=newrecdates
meta.PairDate=newpairdates

In [28]:
meta.to_csv('metadata_intros_v3.csv')

In [54]:
# ----- Add colors to meta file so they go in the NWB metadata -----
fillcols = [[160, 146, 95],[245, 201, 39],[89, 91, 125],[63, 78, 245]]
linecols = [[96, 87, 57],[147, 120, 23],[53, 54, 75],[33, 41, 131]]

# meta.FocalColor
allcols = []
for i, ptag in enumerate(meta.PairTag):
    if meta.FocalSex[i]=='F':
        if meta.FocalGT[i]=='WT':
            tcol = [fillcols[0],linecols[0]]
        else:
            tcol = [fillcols[1],linecols[1]]
    else:
        if meta.FocalGT[i]=='WT':
            tcol = [fillcols[2],linecols[2]]
        else:
            tcol = [fillcols[3],linecols[3]]
    allcols.append(tcol)
    
meta.FocalColor=allcols

In [56]:
meta.to_csv('metadata_intros_v4.csv')

In [90]:
# ----- NWB skeleton -----

# 1. Set up user parameters
# 2. Loop over metadata file
# 3. Get and set up metadata
# 4. Set up behavior table
# 5. Write nwb file

# -- set parameters
lab = "Manoli @ UCSF"
exclude_flag = False
# path to storage for NWB files
nwbfile_path = os.path.join('M:\\','scn2a-paper-GWJSNH','NWB-files')

# load metadata
meta = pd.read_csv('metadata_intros_v4.csv')
meta.FocalColor = meta.FocalColor.apply(literal_eval) # convert the colors to real arrays

# keep track of overloaded files
multifile_log = []

# keep track of annotation overlaps
ol_files = []
ol_behav = []
ol_start = []
ol_end = []

# keep track of annotation gaps
g_files = []
g_b1 = []
g_b2 = []
g_end = []
g_start = []

# -- loop over metadata
for i, ptag in enumerate(meta.PairTag):
    assay_type = meta.AssayType[i]
    nwbfilename = f'{ptag}_{assay_type}.nwb'
    print(nwbfilename)
    
    # check if file already exists
    wfullpath = os.path.join(nwbfile_path,nwbfilename)    
    if not os.path.exists(wfullpath):    
        
        # get session specific metadata
        thisdate = str(meta.RecDate[i])

        # set up recording time... it would be nice to get actual video data for the times
        datepieces = get_date_from_block(thisdate)
        sess_start = datetime(datepieces[0],datepieces[1],datepieces[2],12,0,0,0,tzlocal())

        session_description = f'Behavioral annotations from pair {ptag} in a(n) {assay_type} assay.'

        # make NWB file
        nwbfile = NWBFile(
            session_description=session_description,
            identifier = str(uuid4()),
            session_start_time = sess_start,
            lab=lab,
            experimenter=meta.RanBy[i],
            session_id = nwbfilename[0:-4], # check this
        )

        # add subject info
        nwbfile.subject = Subject(
            subject_id = meta.FocalID[i],
            species = 'Microtus ochrogaster',
            sex = meta.FocalSex[i],
            genotype = meta.FocalGT[i]
        )

        # get assay duration
        duration = float(meta.AssayDuration[i])

        # figure out partner info
        if meta.FocalSex[i]=='F':
            pID = meta.MaleID[i]
            pGT = meta.MaleGT[i]
        elif meta.FocalSex[i]=='M':
            pID = meta.FemaleID[i]
            pGT = meta.MaleGT[i]
        else:
            print(f'Focal sex is neither F nor M; something is wrong with {ptag}.')

        # TODO convert these to date objects and make sure this works
        # figure out time since pairing
        days = meta.RecDate[i] - meta.PairDate[i]

        # add lab metadata
        metaObj = AssayMetadata(
                        assay_type=assay_type,
                        exclude_flag=exclude_flag,
                        duration=duration,
                        room=str(meta.AssayRoom[i]),
                        timeline=str(meta.Timeline[i]),
                        ethogram=str(meta.Ethogram[i]),
                        experimenter=str(meta.ScoredBy[i]),
                        timeline_complete=meta.FullTimeline[i],
                        colors=meta.FocalColor[i],
                        introduction=os.path.join(meta.ScorePath[i],meta.ScoreFile[i]),
                        introduction__partner_ID=str(pID),
                        introduction__partner_GT=str(pGT),
                        )

        # Add the test LabMetaDataExtensionExample to the NWBFile
        nwbfile.add_lab_meta_data(lab_meta_data=metaObj)

        # TODO Add video file
        video_ext_file = ImageSeries(
            name='behaviorVideo',
            description='Raw original video.',
            unit='n.a.',
            external_file=[os.path.join(meta.VideoPath[i],meta.VideoFile[i])],
            format='external',
            starting_time=0.0,
            rate=25.0,
        )

        nwbfile.add_acquisition(video_ext_file)

        # --- Add annotated behavior data

        # Make sure these files are downloaded with the same path configuration as listed in the metadata
        # Or adjust here as I did with telling the code to go up a level
        scoretab = pd.read_csv(os.path.join('..',meta.ScorePath[i],meta.ScoreFile[i])) # load up csv of annotations

        # change problematic column names
        scoretab.rename(columns={'Start (s)':'start'}, inplace=True)
        scoretab.rename(columns={'Stop (s)':'end'}, inplace=True)
        scoretab.rename(columns={'Duration (s)':'duration'}, inplace=True)
        scoretab.rename(columns={'Behavior type':'behavior_type'}, inplace=True)

        # --- Check for problems in the annotation file

        # check for extraneous media files
        if len(np.unique(scoretab['Media file']))>1:
            warnings.warn(f'Scored csv {meta.ScoreFile[i]} contains events for multiple media files.')
            multifile_log.append(meta.ScoreFile[i])
            for fp in np.unique(scoretab['Media file']):
                print(fp)

        else:       

            # check for total event duration violations
            totdur = np.sum(scoretab.duration)
            if totdur>1.05*duration:
                warnings.warn(f'Total events duration of {meta.ScoreFile[i]} exceeds the assay duration.')
            elif totdur<0.95*duration:
                warnings.warn(f'Total events duration of {meta.ScoreFile[i]} does not meet the assay duration.')

            # check for overlapping events and check for unscored gaps between events
            # keep track of annotation overlaps
            ols,ole,olb,gs,ge,gb1,gb2 = check_for_event_interactions(scoretab,True)
            
            # log overlaps
            for j,ol in enumerate(ols):
                ol_files.append(meta.ScoreFile[i])
                ol_behav.append(olb[j])
                ol_start.append(ol)
                ol_end.append(ole[j])

            # log gaps
            for j, gp in enumerate(gs):
                g_files.append(meta.ScoreFile[i])
                g_b1.append(gb1[j])
                g_b2.append(gb2[j])
                g_end.append(ge[j])
                g_start.append(gs[j])

        # make NWB object corresponding to the annotation table
        behavior_intervals = TimeIntervals(name="annotated_behavior",
            description="Intervals of scored behavior.")

        behavior_intervals.add_column(name="behavior", description="The annotation from the ethogram.")
        behavior_intervals.add_column(name="duration", description="Duration of the behavior.")
        behavior_intervals.add_column(name="atype", description="Point or state event.")

        # populate table
        for i, start in enumerate(scoretab.start):
            end = scoretab.end[i]
            behav = scoretab.Behavior[i]
            atype = scoretab.behavior_type[i]
            dur = scoretab.duration[i]
            behavior_intervals.add_row(start_time=start,stop_time=end,behavior=behav,atype=atype,duration=dur)

        # add to NWB file
        nwbfile.add_time_intervals(behavior_intervals)
        
        # write file to disk
        if write_NWB_to_disk:
            with NWBHDF5IO(wfullpath, "w") as io:
                io.write(nwbfile)

# --- Write annotation violation logs for review
ol_log = {'file':ol_files,'behavior':ol_behav,'start_time':ol_start,'end_time':ol_end}
ol_tab = pd.DataFrame(data=ol_log)
ol_tab.to_csv(f'overlap_log_{meta.AssayType[i]}.csv',index=False)

gap_durs = []
for i,st in enumerate(g_start):
    gap_durs.append(st-g_end[i])
gap_log = {'file':g_files,'behavior1':g_b1,'behavior1_end':g_end,'behavior2':g_b2,'behavior2_start':g_start,
           'gap_duration':gap_durs}
g_tab = pd.DataFrame(data=gap_log)
g_tab.to_csv(f'gap_log_{meta.AssayType[i]}.csv',index=False)

Nov22_Pair1_introduction.nwb




Nov22_Pair15_introduction.nwb
Gap violation for behavior Investigate ending at 1401.436 and next behavior Huddle starting at 1401.935.




Nov22_Pair16_introduction.nwb




Nov22_Pair17_introduction.nwb




Nov22_Pair18_introduction.nwb




Nov22_Pair19_introduction.nwb




Nov22_Pair2_introduction.nwb




Nov22_Pair21_introduction.nwb




Nov22_Pair22_introduction.nwb
Gap violation for behavior No interaction ending at 519.788 and next behavior Sniff starting at 522.037.




Nov22_Pair23_introduction.nwb
Gap violation for behavior Investigate ending at 899.036 and next behavior No interaction starting at 899.789.
Gap violation for behavior No interaction ending at 1185.702 and next behavior Investigate starting at 1186.705.




Nov22_Pair24_introduction.nwb
Gap violation for behavior Investigate ending at 50.367 and next behavior No interaction starting at 50.452.
Gap violation for behavior Sniff ending at 52.12 and next behavior Investigate starting at 55.121.




Nov22_Pair25_introduction.nwb
Gap violation for behavior No interaction ending at 509.455 and next behavior Investigate starting at 510.951.
Gap violation for behavior Investigate ending at 1520.873 and next behavior Sniff starting at 1523.124.




Nov22_Pair26_introduction.nwb
Gap violation for behavior Sniff ending at 458.872 and next behavior No interaction starting at 459.12.
Gap violation for behavior Sniff ending at 592.867 and next behavior No interaction starting at 593.868.
Gap violation for behavior No interaction ending at 618.786 and next behavior Sniff starting at 620.035.




Nov22_Pair27_introduction.nwb
Gap violation for behavior Sniff ending at 428.418 and next behavior No interaction starting at 428.42.
Gap violation for behavior Sniff ending at 572.286 and next behavior Investigate starting at 572.29.
Gap violation for behavior No interaction ending at 770.879 and next behavior Sniff starting at 771.128.




Nov22_Pair28_introduction.nwb
Overlap violation for behavior No interaction starting at 539.206 and ending at 539.622.
Gap violation for behavior No interaction ending at 539.198 and next behavior No interaction starting at 539.206.
Gap violation for behavior Sniff ending at 539.621 and next behavior Sniff starting at 540.122.




Nov22_Pair29_introduction.nwb




Nov22_Pair3_introduction.nwb




Nov22_Pair30_introduction.nwb
Gap violation for behavior Sniff ending at 309.286 and next behavior Investigate starting at 309.288.
Gap violation for behavior No interaction ending at 1606.456 and next behavior No interaction starting at 1610.707.
Gap violation for behavior No interaction ending at 1621.206 and next behavior Sniff starting at 1625.209.




Nov22_Pair31_introduction.nwb




Nov22_Pair32_introduction.nwb




Nov22_Pair33_introduction.nwb




Nov22_Pair34_introduction.nwb
Overlap violation for behavior Huddle starting at 1594.873 and ending at 1641.874.




Nov22_Pair35_introduction.nwb




Nov22_Pair36_introduction.nwb
Overlap violation for behavior No interaction starting at 306.119 and ending at 306.534.
Gap violation for behavior No interaction ending at 306.036 and next behavior No interaction starting at 306.119.
Gap violation for behavior Investigate ending at 306.533 and next behavior Investigate starting at 307.286.




Nov22_Pair37_introduction.nwb




Nov22_Pair38_introduction.nwb




Nov22_Pair39_introduction.nwb
Gap violation for behavior No interaction ending at 313.871 and next behavior Defensive rear starting at 340.453.




Nov22_Pair4_introduction.nwb
Gap violation for behavior Sniff ending at 571.886 and next behavior No interaction starting at 571.905.




Nov22_Pair40_introduction.nwb




Nov22_Pair41_introduction.nwb




Nov22_Pair42_introduction.nwb




Nov22_Pair43_introduction.nwb
Gap violation for behavior Investigate ending at 1074.708 and next behavior Huddle starting at 1076.208.




Nov22_Pair44_introduction.nwb




Nov22_Pair46_introduction.nwb




Nov22_Pair47_introduction.nwb




Nov22_Pair48_introduction.nwb




Nov22_Pair49_introduction.nwb
Gap violation for behavior No interaction ending at 1077.872 and next behavior Investigate starting at 1077.877.




Nov22_Pair5_introduction.nwb




Nov22_Pair50_introduction.nwb




Nov22_Pair51_introduction.nwb




Nov22_Pair52_introduction.nwb




Nov22_Pair56_introduction.nwb
Gap violation for behavior Investigate ending at 330.37 and next behavior Defensive rear starting at 330.623.




Nov22_Pair57_introduction.nwb




Nov22_Pair58_introduction.nwb




Nov22_Pair59_introduction.nwb




Nov22_Pair6_introduction.nwb




Nov22_Pair60_introduction.nwb
Gap violation for behavior No interaction ending at 1019.874 and next behavior Sniff starting at 1021.041.




Nov22_Pair61_introduction.nwb




Nov22_Pair62_introduction.nwb




Nov22_Pair63_introduction.nwb
Gap violation for behavior No interaction ending at 1220.374 and next behavior Defensive rear starting at 1221.871.




Nov22_Pair64_introduction.nwb
Gap violation for behavior No interaction ending at 1798.639 and next behavior Huddle starting at 1798.89.




Nov22_Pair7_introduction.nwb




Nov22_Pair8_introduction.nwb




In [89]:
ol_log = {'file':ol_files,'behavior':ol_behav,'start_time':ol_start,'end_time':ol_end}
ol_tab = pd.DataFrame(data=ol_log)
ol_tab.to_csv('overlap_log_intros.csv',index=False)

gap_durs = []
for i,st in enumerate(g_start):
    gap_durs.append(st-g_end[i])
gap_log = {'file':g_files,'behavior1':g_b1,'behavior1_end':g_end,'behavior2':g_b2,'behavior2_start':g_start,
           'gap_duration':gap_durs}
g_tab = pd.DataFrame(data=gap_log)
g_tab.to_csv('gap_log_intros.csv',index=False)

In [88]:
ol_tab

Unnamed: 0,file,behavior,start_time,end_time
0,Nov22_Pair28_Intro.csv,No interaction,539.206,539.622
1,Nov22_Pair34_Intro.csv,Huddle,1594.873,1641.874
2,Nov22_Pair36_Intro.csv,No interaction,306.119,306.534


In [69]:
# --- functions to check for overlapping state events and gaps between state events

def check_for_overlapping_states(starts,ends,behaviors):
    '''Looks for overlapping times in state events.'''
    
    violation_s = []
    violation_e = []
    violation_b = []
    
    for i,start in enumerate(starts.iloc[:-2]):
        if ends.iloc[i]>starts.iloc[i+1]:
            violation_s.append(start)
            violation_e.append(ends.iloc[i])
            violation_b.append(behaviors.iloc[i])
            
    return violation_s, violation_e, violation_b

def check_for_state_gaps(starts,ends,behaviors):
    '''Looks for gaps between state events.'''
    
    violation_s = []
    violation_e = []
    violation_b1 = []
    violation_b2 = []
    
    for i,start in enumerate(starts.iloc[:-2]):
        if round(starts.iloc[i+1]-ends.iloc[i],3)>0.001:
            violation_s.append(starts.iloc[i+1])
            violation_e.append(ends.iloc[i])
            violation_b1.append(behaviors.iloc[i])
            violation_b2.append(behaviors.iloc[i+1])
            
    return violation_s, violation_e, violation_b1, violation_b2

def check_for_event_interactions(scoretab,verbose):
    '''Check a table of behavioral annotations for gap and overlap violations.'''
    
    s = scoretab.start
    e = scoretab.end
    b = scoretab.Behavior
    
    mask = scoretab.behavior_type=='STATE'
    
    smask = s[mask]
    emask = e[mask]
    bmask = b[mask]
       
    overlap_s, overlap_e, overlap_b = check_for_overlapping_states(smask,emask,bmask)
    gap_s, gap_e, gap_b1, gap_b2 = check_for_state_gaps(smask,emask,bmask)
    
    if verbose:
    
        for i,ols in enumerate(overlap_s):
            print(f'Overlap violation for behavior {overlap_b[i]} starting at {ols} and ending at {overlap_e[i]}.')

        for i, gs in enumerate(gap_s):
            print(f'Gap violation for behavior {gap_b1[i]} ending at {gap_e[i]} and next behavior {gap_b2[i]} starting at {gs}.')
            
    return overlap_s,overlap_e,overlap_b,gap_s,gap_e,gap_b1,gap_b2

In [61]:
from ast import literal_eval
# df['col2'] = df['col2'].apply(literal_eval)
# meta.FocalColor = meta.FocalColor.apply(literal_eval)
meta.FocalColor[i]

[[245, 201, 39], [147, 120, 23]]

In [50]:
nwbfile

Unnamed: 0_level_0,start_time,stop_time,behavior,duration,atype
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,261.022,261.022,Start assay,,POINT
1,261.023,264.56,No interaction,3.537,STATE
2,264.561,265.31,Sniff,0.749,STATE
3,265.311,266.308,No interaction,0.997,STATE


In [38]:
meta.FocalColor[0]

nan

In [19]:
# ---- figure out score file checks
scorepath = os.path.join('Scn2a_X1Behavior_Nov22','Aggregated_Events')
scorefile = 'Nov22_Pair28_Intro.csv'
scoretab = pd.read_csv(os.path.join('..',scorepath,scorefile)) # load up csv of annotations

In [23]:
print(len(np.unique(scoretab['Media file'])))

12


In [None]:
# --- organizing NWB intervals concept

scoretab = pd.read_csv(thisScoreFile) # load up csv of annotations

# change problematic column names
scoretab.rename(columns={'Start (s)':'start'}, inplace=True)
scoretab.rename(columns={'Stop (s)':'end'}, inplace=True)
scoretab.rename(columns={'Duration (s)':'duration'}, inplace=True)
scoretab.rename(columns={'Behavior type':'behavior_type'}, inplace=True)

# make NWB object corresponding to the annotation table
behavior_intervals = TimeIntervals(
    name="annotated_behavior",
    description="Intervals of scored behavior.",
    )

    behavior_intervals.add_column(name="behavior", description="The annotation from the ethogram.")
    behavior_intervals.add_column(name="duration", description="Duration of the behavior.")
    behavior_intervals.add_column(name="atype", description="Point or state event.")

    for i, start in enumerate(scoretab.start):
        end = scoretab.end[i]
        behav = scoretab.Behavior[i]
        atype = scoretab.behavior_type[i]
        dur = scoretab.duration[i]
        call_intervals.add_row(start_time=start,stop_time=end,behavior=behav,atype=atype,duration=dur)