In [1]:
import pandas as pd
import math
import numpy as np
import os
import glob
frames_per_second = 30

In [2]:
#directory where behavior videos are located
video_directory = '01_Raw_videos'
dataframe_directory = '08_Feature_added_dataframe'
output_directory = '07_Processed_videos'
file_location = os.path.join(video_directory, '*.mp4')

# create list of filenames for every video to be processed
# this list is the input for the video processing function
filenames = glob.glob(file_location)
videoname = []
for f in filenames:
    # establish name for output file from the input filename
    x = os.path.split(f)
    x = x[1].split('.mp4')
    x = x[0]
    videoname.append(x)
videoname

['Gal_35_father_retreive_trial_1']

In [35]:
# create and empty dataframe with the colum names of the behavioral variables to be measured
# every every video will be processed in a for loop, with the results being appended to this empty dataframe
columns_name = ['Video Name']
Data = pd.DataFrame(columns = columns_name)
infants = []

# Loop through all the videos
for x in videoname:
    
    # Import the h5 file for the feature dataframe and call it Animals
    hdf_file = os.path.join(dataframe_directory, x)
    hdf_var = glob.glob(hdf_file + '*')
    Animals = pd.read_hdf(hdf_var[0])

    # Loop through all the columns in animals and add the name of the infant names (Infant1, Infant2, etc)
    # to the infants list
    for column in Animals.columns:
        if 'Infant' in column[0] and column[0] not in infants:
            infants.append(column[0])

    # Append the elements of infants to the columns name list
    for i in infants:
        columns_name.append(i)
    
    # Make different lists, dictionaries and variables to be used in extracting data
    columns_appear = []
    columns_disappear = []
    columns_reappear = []
    nest_times = {}
    just_adult = 0
    
    # Loop through the infants list
    for i in infants:
        
        # set a variable to count the number of frames the adult is with each infant in the nest
        with_infant = 0
        
        # to be concise, make an infant column for the latency calculation
        infant_column = (i, 'middle_head', 'x')
        
        # make lists corresponding to the columns_ lists from above
        disappear = []
        reappear = []
        appear = []
        
        # Loop through each row in the dataframe
        for j in range(len(Animals)):
            
            # Check if the adult is in the nest with each infant, and add 1 to with_infant if yes
            if (Animals.at[j, ('In_Nest', 'Adult', 'body')] == 1 and
            Animals.at[j, ('In_Nest', i, 'head')] == 1 and
            Animals.at[j, ('In_Nest', i, 'middle_head')] == 1 and
            Animals.at[j, ('In_Nest', i, 'middle_tail')] == 1 and
            Animals.at[j, ('In_Nest', i, 'tail')] == 1):
            
                with_infant += 1
            
            # Check if the adult is in the nest at all, and add 1 to just_adult if yes
            if i == infants[0]:
                if Animals.at[j, ('In_Nest', 'Adult', 'body')] == 1:
                    just_adult += 1
            
            # Make sure you're not overindexing
            if j + 1 not in Animals[infant_column].index:
                continue

            # Append to the appear list the first time an infant appears in a video
            if pd.isnull(Animals.at[j, infant_column]) and pd.notnull(Animals.at[j + 1, infant_column]):
                appear.append(j)

            # Add to disappear and reappear lists based on when the infant points disappeared and reappeared
            # before being filled in by the fill-in-points script
            if Animals.at[j, (i, 'middle_head', 'likelihood')] < .011:

                if j - 1 not in Animals[infant_column].index:
                    continue

                if Animals.at[j - 1, (i, 'middle_head', 'likelihood')] > .011:
                    disappear.append(j - 1)

                if j + 1 not in Animals[infant_column].index:
                    disappear.pop(-1)
                    continue

                if Animals.at[j + 1, (i, 'middle_head', 'likelihood')] > .011:
                    reappear.append(j + 1)
        
        # If the disappear and reappear lists have different lengths, remove the last element from the disappear list
        if len(disappear) > len(reappear):
            disappear.pop(-1)
        
        # Append to the columns lists
        columns_appear.append(appear[0])
    
        columns_disappear.append(disappear)
        columns_reappear.append(reappear)
        
        # In the nest_times dictionaries, append the values the adult spends in the nest with each infant and the
        # total time spent by the adult in the nest
        nest_times['with_' + i] = with_infant / frames_per_second
    nest_times['just_adult'] = just_adult / frames_per_second 
    print(nest_times)
    
    # Create another dictionary called latencies and loop through columns_disappear and each list inside columns_disappear
    latencies = {}
    for i in range(len(columns_disappear)):
        post_latency = 0

        for j in range(len(columns_disappear[i])):

            # Calculate the distance between when the infant disappears and reappears
            distance = math.sqrt((Animals.at[columns_reappear[i][j], ('Infant' + str(i + 1), 'middle_head', 'x')] 
                                - Animals.at[columns_disappear[i][j], ('Infant' + str(i + 1), 'middle_head', 'x')]) ** 2 
                                +(Animals.at[columns_reappear[i][j], ('Infant' + str(i + 1), 'middle_head', 'y')] 
                                - Animals.at[columns_disappear[i][j], ('Infant' + str(i + 1), 'middle_head', 'y')]) ** 2)
            
            # Continuously reassign the post_latency and latency_frame values when a bigger post_latency value comes along
            # in an iteration
            if distance > post_latency:
                post_latency = distance
                latency_frame = columns_disappear[i][j]
        
        # If post_latency ends up being less than 100, the adult probably didn't retrieve the infant, so make the
        # latency time null
        if post_latency < 100:
            latencies['Latency_' + 'Infant' + str(i + 1)] = 'NaN'
            continue

        # Otherwise, append the post_latency value to the latencies dictionary
        latencies['Latency_' + 'Infant' + str(i + 1)] = (latency_frame - columns_appear[i]) / frames_per_second        
    
    print(latencies)
    
    # Make a new column of the Data dataframe to include the latencty values and nest time values
    new_row = {'Video Name':x}
    for i in infants:
        new_row['Latency ' + i] = latencies['Latency_' + i]
        new_row['Nest Time Adult With ' + i] = nest_times['with_' + i]
    new_row['Total Adult Nest Time'] = nest_times['just_adult']
    Data = Data.append(new_row, ignore_index=True)    

  if Animals.at[j, (i, 'middle_head', 'likelihood')] < .011:


{'with_Infant1': 215.03333333333333, 'with_Infant2': 239.33333333333334, 'just_adult': 284.4}
{'Latency_Infant1': 98.1, 'Latency_Infant2': 72.3}


In [36]:
Data

Unnamed: 0,Video Name,Latency Infant1,Latency Infant2,Nest Time Adult With Infant1,Nest Time Adult With Infant2,Total Adult Nest Time
0,Gal_35_father_retreive_trial_1,98.1,72.3,215.033333,239.333333,284.4
