# Idea

Examine the tracking accuracy of the experiments.
Load the raw (before post-processing) trajectories, and examine the amount of missing data.

# Imports

In [2]:
import numpy as np
import h5py
import pandas as pd
import time
import scipy.ndimage
import glob
from multiprocessing import Pool
import matplotlib.pyplot as plt
%load_ext autoreload
import os
%autoreload 2
import sys
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load the tracking data

In [3]:
main_load_folder = '/path/to/fighting_data/tracking_results/'

# ----------------------#

loadpaths = glob.glob(os.path.join(main_load_folder, '*.h5'))
loadpaths.sort()

# parse the exp names
expNames = [path.split('/')[-1][:23] for path in loadpaths]

# load the raw trajectories of each experiment
raw_trajectories = []
for path in loadpaths:
    with h5py.File(path, 'r') as hf:
        tracks_3D_raw = hf['tracks_3D_raw'][:]
    print(tracks_3D_raw.shape)
    raw_trajectories.append(tracks_3D_raw)

# Generating the tracking accuracy table

In [4]:
t0 = time.time()


# an arbitrary enough threshold to say if a gap is above or below
nan_gap_THRESH = 20 #frames


# ==============================================#

nan_frame_percentage_list = []
largest_nan_gaps = []
num_nan_gaps_above_THRESH_list = []
gap_means = []
gap_quant_99s = []
gap_quant_95s = []
numFrames_list = []

for expIdx, exp_name in enumerate(expNames):
    trks = np.copy(raw_trajectories[expIdx])
    
    numFrames = trks.shape[0]
    no_data_at_all = np.zeros((numFrames,))

    for fIdx in range(numFrames):
        if np.all(np.isnan(trks[fIdx])):
            no_data_at_all[fIdx] = 1

    regions = scipy.ndimage.find_objects(scipy.ndimage.label(no_data_at_all)[0])
    nan_gap_sizes = np.array([np.sum(no_data_at_all[r]) for r in regions])
    num_nan_frames = np.sum(no_data_at_all)
    nan_frame_percentage = (num_nan_frames/numFrames)*100

    # find the largest gap size
    largest_nan_gap = int(np.max(nan_gap_sizes))

    # find the number of gaps above a threshold size
    nan_gap_THRESH = 20 #frames
    num_nan_gaps_above_THRESH = nan_gap_sizes[nan_gap_sizes>nan_gap_THRESH].shape[0]

    # other stats
    mew = np.mean(nan_gap_sizes)
    quant_99 = np.quantile(nan_gap_sizes, 0.99)
    quant_95 = np.quantile(nan_gap_sizes, 0.95)
    
    # record
    nan_frame_percentage_list.append(nan_frame_percentage)
    largest_nan_gaps.append(largest_nan_gap)
    num_nan_gaps_above_THRESH_list.append(num_nan_gaps_above_THRESH)
    gap_means.append(mew)
    gap_quant_95s.append(quant_95)
    gap_quant_99s.append(quant_99)
    numFrames_list.append(numFrames)


    
# ------------------------------------#

# build a pd dataframe to hold the information

column_names = ["numFrames", "nan_frm_percentage", "largest_nan_gap", 
                "num_nan_gaps_above_THRESH", "mean_gaps", "95th quant", "99th quant"]

# gather the data for the dataframe
nan_frm_percentage_data = np.array(nan_frame_percentage_list)
largest_nan_gaps_data = np.array(largest_nan_gaps)
num_nan_gaps_above_THRESH_data = np.array(num_nan_gaps_above_THRESH_list)
gap_means_data = np.array(gap_means)
gap_quant_95s_data = np.array(gap_quant_95s)
gap_quant_99s_data = np.array(gap_quant_99s)
exp_numframes_data = np.array(numFrames_list)

tracking_data = np.vstack([exp_numframes_data, nan_frm_percentage_data,
                      largest_nan_gaps_data, num_nan_gaps_above_THRESH_data,
                      gap_means_data, gap_quant_95s_data, gap_quant_99s_data]).T

tracking_results_df = pd.DataFrame(tracking_data, columns=column_names, index=expNames)


    
    
tE = time.time()
print('---')
print(tE-t0)

---
77.9360671043396


In [5]:
tracking_results_df

Unnamed: 0,numFrames,nan_frm_percentage,largest_nan_gap,num_nan_gaps_above_THRESH,mean_gaps,95th quant,99th quant
FishTank20200127_143538,971756.0,5.560449,240.0,418.0,3.809504,12.0,45.0
FishTank20200129_140656,769848.0,6.157839,145.0,420.0,3.796428,15.0,40.0
FishTank20200130_153857,501943.0,3.056921,124.0,100.0,2.916556,9.0,30.0
FishTank20200130_181614,517000.0,2.624565,79.0,65.0,2.7138,9.0,25.01
FishTank20200207_161445,463997.0,6.562758,203.0,181.0,3.482901,10.0,35.0
FishTank20200213_154940,730119.0,4.727312,184.0,218.0,3.548006,10.0,40.73
FishTank20200214_153519,495610.0,1.109138,77.0,16.0,2.407797,6.0,16.0
FishTank20200217_160052,601226.0,1.890803,93.0,61.0,2.702806,8.0,26.0
FishTank20200218_153008,537880.0,4.053692,135.0,135.0,2.983171,9.0,33.0
FishTank20200316_163320,1032353.0,1.71482,107.0,62.0,2.528639,7.0,19.0


In [6]:
paper_data = np.vstack([exp_numframes_data, nan_frm_percentage_data, gap_means_data, gap_quant_99s_data]).T

paper_column_names = ["numFrames", "nan_frm_percentage", "mean_gaps", "99th quant"]

paper_data_df = pd.DataFrame(paper_data, columns=paper_column_names, index=expNames)
paper_data_df

Unnamed: 0,numFrames,nan_frm_percentage,mean_gaps,99th quant
FishTank20200127_143538,971756.0,5.560449,3.809504,45.0
FishTank20200129_140656,769848.0,6.157839,3.796428,40.0
FishTank20200130_153857,501943.0,3.056921,2.916556,30.0
FishTank20200130_181614,517000.0,2.624565,2.7138,25.01
FishTank20200207_161445,463997.0,6.562758,3.482901,35.0
FishTank20200213_154940,730119.0,4.727312,3.548006,40.73
FishTank20200214_153519,495610.0,1.109138,2.407797,16.0
FishTank20200217_160052,601226.0,1.890803,2.702806,26.0
FishTank20200218_153008,537880.0,4.053692,2.983171,33.0
FishTank20200316_163320,1032353.0,1.71482,2.528639,19.0


# ------------------------------------------------

# Generating tracking accuracy - pp

This section is not used in the paper, but illustrates the tracking accuracy after post-processing.

In [67]:
# load the raw trajectories of each experiment
smooth_trajectories = []
for path in loadpaths:
    with h5py.File(path, 'r') as hf:
        tracks_3D_smooth = hf['tracks_3D_smooth'][:]
    print(tracks_3D_smooth.shape)
    smooth_trajectories.append(tracks_3D_smooth)

(971756, 2, 3, 3)
(769848, 2, 3, 3)
(501943, 2, 3, 3)
(517000, 2, 3, 3)
(463997, 2, 3, 3)
(730119, 2, 3, 3)
(495610, 2, 3, 3)
(601226, 2, 3, 3)
(537880, 2, 3, 3)
(1032353, 2, 3, 3)
(561010, 2, 3, 3)
(556428, 2, 3, 3)
(746434, 2, 3, 3)
(1248577, 2, 3, 3)
(596607, 2, 3, 3)
(588962, 2, 3, 3)
(689100, 2, 3, 3)
(497635, 2, 3, 3)
(1695643, 2, 3, 3)
(798382, 2, 3, 3)
(717814, 2, 3, 3)
(726000, 2, 3, 3)


In [69]:
t0 = time.time()


# an arbitrary enough threshold to say if a gap is above or below
nan_gap_THRESH = 20 #frames


# ==============================================#

nan_frame_percentage_list = []
largest_nan_gaps = []
num_nan_gaps_above_THRESH_list = []
gap_means = []
gap_quant_99s = []
gap_quant_95s = []
numFrames_list = []

for expIdx, exp_name in enumerate(expNames):
    trks = np.copy(smooth_trajectories[expIdx])
    
    numFrames = trks.shape[0]
    no_data_at_all = np.zeros((numFrames,))

    for fIdx in range(numFrames):
        if np.all(np.isnan(trks[fIdx])):
            no_data_at_all[fIdx] = 1

    regions = scipy.ndimage.find_objects(scipy.ndimage.label(no_data_at_all)[0])
    nan_gap_sizes = np.array([np.sum(no_data_at_all[r]) for r in regions])
    num_nan_frames = np.sum(no_data_at_all)
    nan_frame_percentage = (num_nan_frames/numFrames)*100

    # find the largest gap size
    largest_nan_gap = int(np.max(nan_gap_sizes))

    # find the number of gaps above a threshold size
    nan_gap_THRESH = 20 #frames
    num_nan_gaps_above_THRESH = nan_gap_sizes[nan_gap_sizes>nan_gap_THRESH].shape[0]

    # other stats
    mew = np.mean(nan_gap_sizes)
    quant_99 = np.quantile(nan_gap_sizes, 0.99)
    quant_95 = np.quantile(nan_gap_sizes, 0.95)
    
    # record
    nan_frame_percentage_list.append(nan_frame_percentage)
    largest_nan_gaps.append(largest_nan_gap)
    num_nan_gaps_above_THRESH_list.append(num_nan_gaps_above_THRESH)
    gap_means.append(mew)
    gap_quant_95s.append(quant_95)
    gap_quant_99s.append(quant_99)
    numFrames_list.append(numFrames)


    
# ------------------------------------#

# build a pd dataframe to hold the information

column_names = ["numFrames", "nan_frm_percentage", "largest_nan_gap", 
                "num_nan_gaps_above_THRESH", "mean_gaps", "95th quant", "99th quant"]

# gather the data for the dataframe
nan_frm_percentage_data = np.array(nan_frame_percentage_list)
largest_nan_gaps_data = np.array(largest_nan_gaps)
num_nan_gaps_above_THRESH_data = np.array(num_nan_gaps_above_THRESH_list)
gap_means_data = np.array(gap_means)
gap_quant_95s_data = np.array(gap_quant_95s)
gap_quant_99s_data = np.array(gap_quant_99s)
exp_numframes_data = np.array(numFrames_list)

tracking_data = np.vstack([exp_numframes_data, nan_frm_percentage_data,
                      largest_nan_gaps_data, num_nan_gaps_above_THRESH_data,
                      gap_means_data, gap_quant_95s_data, gap_quant_99s_data]).T

tracking_results_df = pd.DataFrame(tracking_data, columns=column_names, index=expNames)


    
    
tE = time.time()
print('---')
print(tE-t0)

---
72.07411694526672


In [72]:
tracking_results_df

Unnamed: 0,numFrames,nan_frm_percentage,largest_nan_gap,num_nan_gaps_above_THRESH,mean_gaps,95th quant,99th quant
FishTank20200127_143538,971756.0,2.434768,238.0,365.0,26.58427,79.0,137.66
FishTank20200129_140656,769848.0,2.665461,143.0,370.0,23.13416,58.0,82.0
FishTank20200130_153857,501943.0,0.945127,122.0,84.0,23.029126,58.0,84.9
FishTank20200130_181614,517000.0,0.696132,77.0,61.0,18.647668,40.0,59.08
FishTank20200207_161445,463997.0,2.284066,201.0,154.0,22.40592,66.2,94.24
FishTank20200213_154940,730119.0,1.831345,182.0,200.0,26.529762,81.85,127.94
FishTank20200214_153519,495610.0,0.203991,75.0,14.0,19.442308,52.25,71.43
FishTank20200217_160052,601226.0,0.536071,91.0,53.0,20.398734,44.6,83.87
FishTank20200218_153008,537880.0,1.39771,133.0,119.0,23.641509,63.3,101.0
FishTank20200316_163320,1032353.0,0.361601,105.0,53.0,18.758794,46.0,81.22
