# Purpose
to help us figure out how consistently our labels are across humans

# Imports

In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, sys, glob, math
import itertools 

# Functions

In [13]:
def load_and_reformat(location):
    df = pd.read_csv(location,header=[0,1,2,3],index_col=[0,1,2])
    new_df=df.melt(value_vars=list(df.columns[list(np.arange(0,len(df.columns)))]),ignore_index=False)
    return new_df

In [14]:
def get_all_com(df_sub,scorer,list_of_mice):
    df_of_mice=pd.DataFrame(columns=['mouse','com_x','com_y'])
    for mouse in list_of_mice:
        good_flag, com_x, com_y=get_com(df_sub[df_sub.individuals==mouse])
        if good_flag > 0:
            df_of_mice=pd.concat([df_of_mice,pd.DataFrame(columns=df_of_mice.columns,data=[[mouse,com_x,com_y]])],axis=0)
    num_of_mice=len(np.unique(df_of_mice['mouse']))
    return df_of_mice, num_of_mice

In [15]:
def get_com(df):
    if sum(~np.isnan(list(df.value))) > 0:
        com_x=np.nanmedian(df[df.coords=='x'].value)
        com_y=np.nanmedian(df[df.coords=='y'].value)
        good_flag=1
    else:
        good_flag=0
        com_x=0
        com_y=0
    return good_flag, com_x,com_y

In [16]:
def closest(lst, n): 
    lst = np.asarray(lst)
    idx = (np.abs(lst - n)).argmin()
    return lst[idx], idx

In [17]:
def get_individual_for_frame(df,name,frame):
    df_i=df[df.scorer==name]
    df_i=df_i[df_i.level_2==frame]
    bp=np.unique(df_i.bodyparts)
    if len(bp) > 9:
        bp=np.unique(df_i.bodyparts)[[0,3,4,5,6,8,9,10,11]]
    return df_i, bp

In [18]:
def get_summary_from_mouse(ground_truth_subset,closest_match_subset):
    ptp=0
    pfp=0
    pfn=0
    body_parts=np.array(['body_center', 'left_ear', 'left_hip', 'left_side', 'nose', 'right_ear', 'right_hip', 'right_side', 'tail_base'], dtype=object)
    # for body_part in np.unique(bodyparts):
    bp_vals=[]
    for body_part in body_parts:
            print(body_part)
            lps_bp=ground_truth_subset[ground_truth_subset.bodyparts==body_part]
            ann_bp=closest_match_subset[closest_match_subset.bodyparts==body_part]
            ds=math.dist([list(lps_bp[lps_bp.coords=='x'].value)[0],list(ann_bp[ann_bp.coords=='y'].value)[0]],[list(ann_bp[ann_bp.coords=='x'].value)[0],list(lps_bp[lps_bp.coords=='y'].value)[0]])
            print('ds is {}'.format(ds))
            this_mouse_dists=[]
            # if no one annotated this part, skip, else
            if np.isnan(list(lps_bp[lps_bp.coords=='x'].value)[0])==False:
                # if LPS annotated
                # but annotator didn't, mark as false negative
                if np.isnan(list(ann_bp[ann_bp.coords=='x'].value)[0]):
                    pfn+=1
                # if LPS annotated and annotator did, mark as true positive and get dist
                else: 
                    ptp+=1
                    this_mouse_dists.append(ds)
            elif not np.isnan(list(ann_bp[ann_bp.coords=='x'].value)[0]):
                pfp+=1
                # if LPS didn't annotate but annotator did, mark as false positive
            # otherwise no one annotated, so skip this part
            bp_vals.append(np.nanmean(this_mouse_dists))
    bp_val=np.nanmean(bp_vals)
    return bp_val, pfp, pfn, ptp

# Use case

# Procedure

In [23]:
# load data
dates = ["0529_24B_27-20220114T211328Z-001\\0529_24B_27","0529_25B_23"]
base=str("C:\\Users\\dennise\\Downloads")
file_names=['CollectedData_nishan.csv','CollectedData_ejd.csv','CollectedData_LPS.csv','CollectedData_Jessica.csv']

list_of_mice=['mouse1','mouse2','mouse3','mouse4','mouse5','mouse6']
names=['LPS','nishan','Jessica','ejd']
ground_truth_scorer='LPS'
body_parts=np.array(['body_center', 'left_ear', 'left_hip', 'left_side', 'nose',
       'right_ear', 'right_hip', 'right_side', 'tail_base'], dtype=object)


# get data from each date
date=dates[0]
print(date)
df=load_and_reformat(os.path.join(base,date,file_names[0]))
print(file_names[0])
for name in file_names[1:]:
    print(file_names)
    df=pd.concat([df,load_and_reformat(os.path.join(base,date,name))],axis=0)
df=df.reset_index()
df.head()


date=dates[1]
print(date)
df2=load_and_reformat(os.path.join(base,date,file_names[0]))
for name in file_names[1:]:
    df2=pd.concat([df2,load_and_reformat(os.path.join(base,date,name))],axis=0)
df2=df2.reset_index()

0529_24B_27-20220114T211328Z-001\0529_24B_27
CollectedData_nishan.csv
['CollectedData_nishan.csv', 'CollectedData_ejd.csv', 'CollectedData_LPS.csv', 'CollectedData_Jessica.csv']
['CollectedData_nishan.csv', 'CollectedData_ejd.csv', 'CollectedData_LPS.csv', 'CollectedData_Jessica.csv']
['CollectedData_nishan.csv', 'CollectedData_ejd.csv', 'CollectedData_LPS.csv', 'CollectedData_Jessica.csv']
0529_25B_23


In [119]:
# make an empty df to store results
df_agg=pd.DataFrame(columns=['frame','scorer','total_mice','tp','fp','ptp','pfp','pfn','px'])

# for each frame
n=-1
for frame in np.unique(df.level_2):
    n+=1
    print('on frame {}, {} of {}'.format(frame,n,len(np.unique(df.level_2))))
    # get ground truth, in this case LPS data
    df_sub=df[df.level_2==frame]
    for scorer in names:
        print(scorer)
        df_sub_scorer=df_sub[df_sub.scorer==scorer]
        df_sub_com, l=get_all_com(df_sub_scorer,scorer,list_of_mice)
        df_sub_com=df_sub_com.reset_index()
        if scorer==ground_truth_scorer:
            ground_truth=df_sub_scorer
            ground_com, num_of_mice = get_all_com(ground_truth,ground_truth_scorer,list_of_mice)
            ground_com=ground_com.reset_index()
        else:
            # compare # of mice
            distance=10000000
            # get the closest mouse
            for idx in ground_com.index:
                print('idx is {}'.format(idx))
                for i in df_sub_com.index:
                    print('i is {}'.format(i))
                    new_dist=math.dist([df_sub_com.com_x[i],df_sub_com.com_y[i]],[ground_com.com_x[0],ground_com.com_y[0]])
                    if new_dist < distance:
                        distance=new_dist
                        good_i=i
                print('best match for {} is {}'.format(idx,good_i))
                ground_coords=ground_truth[ground_truth.individuals==ground_com.mouse[idx]]
                # use good_i to get the closest mouse
                closest_coords=df_sub_scorer[df_sub_scorer.individuals==df_sub_com.mouse[good_i]]
                # get data for these
                px, pfp, pfn, ptp = get_summary_from_mouse(ground_coords,closest_coords)
            
            if num_of_mice < l:
                print('LPS annotated fewer mice')
                tp=num_of_mice
                fp=l-num_of_mice      
            elif num_of_mice > l:
                print('LPS annotated more mice')
                tp=l
                fp=0
            else:
                tp=l
                fp=0
                print('both annotated same # of mice')
            
            # for each scorer, save frame data to df_agg
            df_agg=pd.concat([df_agg,pd.DataFrame(columns=['frame','scorer','total_mice','tp','fp','ptp','pfp','pfn','px'],data=[[frame,scorer,num_of_mice,tp,fp,ptp,pfp,pfn,px]])])
                
            
        

on frame img000.png, 0 of 9
LPS
nishan
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 1.701921786232093
left_ear
ds is 2.5761591831918067
left_hip
ds is 4.325544660414647
left_side
ds is 1.5876355210895736
nose
ds is 1.4148693443709652
right_ear
ds is 2.439748863451734
right_hip
ds is 2.350318161113047
right_side
ds is 2.824557946384717
tail_base
ds is nan
both annotated same # of mice
Jessica
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 2.3468300730991727
left_ear
ds is 1.860711042391747
left_hip
ds is nan
left_side
ds is 5.115127677912072
nose
ds is nan
right_ear
ds is 2.3668436110798905
right_hip
ds is nan
right_side
ds is 4.2163058626944
tail_base
ds is nan
both annotated same # of mice
ejd
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 6.429943523581705
left_ear
ds is 1.3172600133942907
left_hip
ds is nan
left_side
ds is 3.4717492286388385
nose
ds is 0.92737717853939
right_ear
ds is 2.9749522124112366
right_hip
ds is nan
right_side
ds is 1.722550846327

  bp_vals.append(np.nanmean(this_mouse_dists))



LPS
nishan
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 0.9716965967832056
left_ear
ds is 1.7913033244438978
left_hip
ds is 2.953540508045728
left_side
ds is nan
nose
ds is nan
right_ear
ds is 1.0030480365295609
right_hip
ds is 3.177907963097699
right_side
ds is nan
tail_base
ds is nan
both annotated same # of mice
Jessica
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 3.334664904521771
left_ear
ds is 1.1786772204345475
left_hip
ds is nan
left_side
ds is nan
nose
ds is nan
right_ear
ds is 3.556345210213021
right_hip
ds is 8.206778963671173
right_side
ds is nan
tail_base
ds is nan
both annotated same # of mice
ejd
idx is 0
i is 0
best match for 0 is 0
body_center
ds is 4.4955807542027975
left_ear
ds is 3.126352621439515
left_hip
ds is 2.091723096809451
left_side
ds is 5.249847864437201
nose
ds is nan
right_ear
ds is 1.4313439786806859
right_hip
ds is 2.161480181307558
right_side
ds is 5.133852859595068
tail_base
ds is nan
both annotated same # of mice
on frame img12

In [121]:
np.mean(df_agg[df_agg.scorer=='nishan'].px)

5.038947790092253

In [122]:
np.mean(df_agg[df_agg.scorer=='Jessica'].px)

4.950611142352708

In [123]:
np.mean(df_agg[df_agg.scorer=='ejd'].px)

5.08816445806895