In [None]:
from videolabeler import utils as vl
import numpy as np
import cv2
from tqdm.notebook import tqdm
import pandas as pd
import os
import random

In [None]:
input_file = '/ariel/science/mmiller/data/imu_net/tiff_to_label/master_labels_con_test.csv'

input_df = pd.read_csv(input_file, index_col=0)
input_df.head()

In [None]:
animals = input_df.animal_id.unique()
print(animals)

#how many frames are there in each recording
rec_dict = {i:len(input_df.loc[input_df.animal_id == i].frame.unique()) for i in animals}

output_df = pd.DataFrame()

print(rec_dict)

## Preprocessing

In [None]:
#this custom aggregate function finds the first non-NA label in a column and returns it for the
#merged row. If the same labeler labeled twice (eg after being served two consecutive batches), it takes their first label
def label_exists(col):
    for row in col:
        if pd.isna(row) == False:
            return row

    return None

#consolidate all overlapping frames into one row
input_df = input_df.groupby(['frame', 'animal_id']).aggregate(label_exists).reset_index()
input_df.sort_values(by='frame')

In [None]:
output = '/ariel/science/mmiller/data/imu_net/tiff_to_label/master_labels_V3154_merged.csv'

input_df.to_csv(output)

## Consensus generation

In [None]:
labelers = [i for i in input_df.columns if len(i) == 2]

In [None]:
output_df = pd.DataFrame()
#for each recording

for animal in ['V3-154', 'V3-163', 'DCMStr41']:
    con_labels = []
    labeler_ids = []
    
    #for each labeler, make a list of which frames have nan values so we can find the closest nan easily later
    nan_dict = {col:input_df.loc[(input_df.animal_id == animal) & (input_df[col].isna())].frame.values for col in labelers}
    
    num_frames = rec_dict[animal]
    
    for frame in range(num_frames):
        labels = input_df.loc[(input_df.animal_id == animal) & (input_df.frame == frame)]
        
        #for each frame, drop labelers who didn't label it
        labels = labels[labelers].dropna(axis=1)
        #print(labels['MT'].iloc[0])
        
        
        #if no overlap, simply use the existing label
        if labels.shape[1] == 1:
            con_labels.append(labels[labels.columns[0]].iloc[0])
            labeler_ids.append(labels.columns[0])
            
        #if there is overlap    
        elif labels.shape[1] > 1:
            labeler = None
            
            #The normal overlap case: for each labeler, check if they're the 1st labeler of 1st half or 2nd labeler of 2nd
            for col in labels.columns:
                
                closest_nan_gap = abs(min([nan_frame - frame for nan_frame in nan_dict[col]], key=abs)) 
                
                #gap of more than n_overlap/2 between current frame and closest nan means
                #this labeler is either the first labeler of first half or second labeler of second half
                if (closest_nan_gap >= 25) & (closest_nan_gap <= 50):
                    labeler = col
                    break
                
                
            #if you didn't meet the criteria set after going through all the columns, 
            #(i.e due to total overlaps and edge cases etc), just pick the label of whoever labeled the frame before
            #this is so we don't have chunks of low-consensus windows due to randomly interleaving labelers
            if labeler == None and labeler_ids[-1] in labels.columns and len(labeler_ids) != 0:
                labeler = labeler_ids[-1]
                
                
            #if you fail even that (ie because it's the start of a batch), just go random. 
            #Only V3-154 should ever have to reach this    
            elif labeler == None:
                labeler = random.choice(labels.columns)
                
            
            #finally, add your labels
            con_labels.append(labels[labeler].iloc[0])
            labeler_ids.append(labeler)
                
            
    animal_df = pd.DataFrame({'animal_id': [animal]*num_frames, 'frame': np.arange(num_frames), 
                              'label':con_labels, 'labeler': labeler_ids})        
    output_df = output_df.append(animal_df)

In [None]:
animal_df.head()

In [None]:
output_df.to_csv('/ariel/science/mmiller/data/imu_net/tiff_to_label/all-consensus-test.csv')

## Spot check whatever we made

In [None]:
video_dir = '/ariel/science/mmiller/data/imu_net/tiff_to_label/V3-154/'
labels_file = '/ariel/science/mmiller/data/imu_net/tiff_to_label/V3-154-consensus-new-method.csv'

for i in range(10):
    vl.window_and_inspect_tiff(video_dir, labels_file)