In [1]:
import os
import time
import tqdm
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch, FancyArrowPatch
import logging
import yaml
import json
import joblib
from sklearn.impute import SimpleImputer
from scipy.stats.contingency import crosstab
import networkx as nx
from matplotlib.lines import Line2D
import umap
import itertools
from sklearn.preprocessing import power_transform

#home = os.path.expanduser("~")
sys.path.append(os.getcwd())
from functions.load_model import load_tolist
import functions.visualise as vis
import functions.process as proc
from functions.io import setup_logger, makedir
from functions import FeatureEngine
sys.path.append(os.path.expanduser('~'))
from PpaPy.processing.preprocess import addhistory, select_features
from functions.modelfunctions import add_power_transform, select_features, addhistory
import argparse

import pickle
from sklearn import set_config
 
from numba import jit
# set invalid (division by zero error) to ignore
np.seterr(invalid='ignore')


class NpIntEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        return json.JSONEncoder.default(self, obj)

class NanConverter(json.JSONEncoder):
    def nan2None(self, obj):
        if isinstance(obj, dict):
            return {k:self.nan2None(v) for k,v in obj.items()}
        elif isinstance(obj, list):
            return [self.nan2None(v) for v in obj]
        elif isinstance(obj, float) and np.isnan(obj):
            return None
        return obj
    def encode(self, obj, *args, **kwargs):
        return super().encode(self.nan2None(obj), *args, **kwargs)
    
# %% [markdown]
# Please provide where your files are stored and where you would like your data to be saved in the following section.

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
2024-05-15 14:55:59.403541: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

# %%
inpath = "/gpfs/soma_fs/scratch/src/boeger/data_gueniz/"
inpath_with_subfolders = True
inpath_pattern = ["Exp1_WT_OP50"]
args_out = "/gpfs/soma_fs/scratch/src/boeger/PpaPred_eren_35727184"

In [3]:
base_outpath = makedir(args_out)

# %%
date = time.strftime("%Y%m%d")
datestr = time.strftime("%Y%m%d-%HH%MM")
home = os.path.expanduser("~")

if inpath_with_subfolders:
    new_inpath = [os.path.join(inpath, sub) for sub in os.listdir(inpath) if any(pat in sub for pat in inpath_pattern)]
    inpath = new_inpath
else:
    inpath = [inpath]

outpath = []
for p in inpath:
    in_folder = os.path.basename(p)
    outpath.append(makedir(os.path.abspath(f"{base_outpath}/{in_folder}")))


# %%
# In the following section, standard model parameters are set. Change those only if necessary.
# changes to config file are preferrerable
config = yaml.safe_load(open("config.yml", "r"))

cluster_color = config['cluster_color']
cluster_group = config['cluster_group_man']
cluster_label = config['cluster_names']
clu_group_label = {_:f'{_}, {__}' for _, __ in tuple(zip([c for c in cluster_label.values()],[g for g in cluster_group.values()]))}
skip_already = config['settings']['skip_already']
overwrite = True

model_path = config['settings']['model']
version = os.path.basename(model_path).split("_")[1].split(".")[0]
ASpath = config['settings']['ASpath']
smooth = config['settings']['fbfill']
fps = config['settings']['fps']

# lists to store already processed files in
prediction_done = []

# %% [markdown]
# 1. Feature Engineering
# In the following section, additional features are calculated.
# The engineerd data files are saved under the specified outpath/subfolder.
# (with subfolder being the inpath folder name postfixed by _engine)

In [4]:
inpath

['/gpfs/soma_fs/scratch/src/boeger/data_gueniz/Exp1_WT_OP50']

In [10]:
XYs, CLines  = FeatureEngine.run(inpath, outpath, return_XYCLine =True, skip_engine = False, skip_already=False, out_fn_suffix='prediction') # skip_engine skip_already

# %%

set_config(transform_output="pandas")
model = joblib.load(open(model_path, 'rb'))
augsel = joblib.load(ASpath)
imp = SimpleImputer(missing_values=np.nan, strategy='mean')

# %%
all_engine = [os.path.join(root, name) for root, dirs, files in os.walk(base_outpath) for name in files if any(pat in os.path.basename(root) for pat in inpath_pattern)]

# %% [markdown]
# ## 3. Prediction

  0%|          | 0/92 [00:00<?, ?it/s]


feature calculation for GGE0241_c_results_204.json_labeldata.csv
Area larger than threshold, collision assumed in [range(2576, 2577), range(2579, 2580), range(2581, 3160)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (25, 2576)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2576, 3160) (over 1 sec long)


  1%|          | 1/92 [00:01<01:59,  1.31s/it]


feature calculation for GGE0241_c_results_3.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2075)


  2%|▏         | 2/92 [00:02<01:37,  1.08s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0241_c_results_34.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (51, 2015)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption


  3%|▎         | 3/92 [00:03<01:28,  1.01it/s]

Ffill-Interpolation of nan frames
Exempted from interpolation: range(0, 51) (over 1 sec long)

feature calculation for GGE0243_c_results_59.json_labeldata.csv
Area larger than threshold, collision assumed in [range(1, 11), range(22, 25)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (25, 1514)
split 1, range: (1523, 6353)
Ffill-Interpolation of nan frames


  4%|▍         | 4/92 [00:05<02:24,  1.64s/it]


feature calculation for GGE0132_c_results_4.json_labeldata.csv
Area larger than threshold, collision assumed in [range(3246, 3284), range(3285, 3286), range(3291, 3292)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3246)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([25]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(3246, 3292) (over 1 sec long)


  5%|▌         | 5/92 [00:07<02:16,  1.57s/it]


feature calculation for GGE0241_c_results_201.json_labeldata.csv
Area larger than threshold, collision assumed in [range(2823, 2824), range(2825, 2930)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (25, 2823)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1402, 1405, 1429]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2823, 2930) (over 1 sec long)


  7%|▋         | 6/92 [00:08<02:09,  1.51s/it]


feature calculation for GGE0243_c_results_19.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2668)


  8%|▊         | 7/92 [00:09<02:00,  1.42s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0241_c_results_16.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3782)
Ffill-Interpolation of nan frames


  9%|▊         | 8/92 [00:11<02:05,  1.49s/it]


feature calculation for GGE0133_c_results_53.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 12191)
Ffill-Interpolation of nan frames


 10%|▉         | 9/92 [00:16<03:35,  2.59s/it]


feature calculation for GGE0131_c_results_59.json_labeldata.csv
Area larger than threshold, collision assumed in [range(356, 359), range(409, 410), range(414, 416), range(418, 419), range(502, 503), range(588, 595), range(602, 603), range(638, 639), range(725, 727), range(731, 734), range(800, 801)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (8, 356)
split 1, range: (359, 409)
split 2, range: (419, 502)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 3, range: (503, 588)
split 4, range: (603, 638)
split 5, range: (639, 725)
split 6, range: (734, 800)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 7, range: (801, 1156)
split 8, range: (1174, 1434)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
split 9, range: (1440, 2580)
split 10, range: (2581, 2675)
split 11, range: (2751, 2784)
split 12, range: (2833, 2906)
split 13, range: (2919, 2955)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


Ffill-Interpolation of nan frames
Exempted from interpolation: range(2675, 2751) (over 1 sec long)
Exempted from interpolation: range(2784, 2833) (over 1 sec long)


 11%|█         | 10/92 [00:18<03:19,  2.43s/it]


feature calculation for GGE0243_c_results_4.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3851)
Ffill-Interpolation of nan frames


 12%|█▏        | 11/92 [00:20<02:57,  2.19s/it]


feature calculation for GGE0132_c_results_16.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3769)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1605, 1606]),)
Ffill-Interpolation of nan frames


 13%|█▎        | 12/92 [00:21<02:41,  2.02s/it]


feature calculation for GGE0243_c_results_74.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 252)
split 1, range: (253, 3286)
Ffill-Interpolation of nan frames


 14%|█▍        | 13/92 [00:23<02:28,  1.87s/it]


feature calculation for GGE0132_c_results_18.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 6515)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  47,   49,   50,   51,   67,   68,   69, 2041, 2042, 4377, 4378,
       4380, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4390, 4391,
       4392, 4393, 4394, 4395, 4396, 4397, 4398, 4399, 4400, 4401, 4402,
       4403, 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, 4412, 4413,
       4414, 4415, 4416, 4417, 4418, 4419, 4420]),)
Ffill-Interpolation of nan frames


 15%|█▌        | 14/92 [00:26<02:44,  2.11s/it]


feature calculation for GGE0132_c_results_5.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 529)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([467, 468]),)
split 1, range: (530, 583)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([39, 40, 42, 43, 44, 45, 52]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 2, range: (598, 8666)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 475,  476,  477,  478,  479,  480,  481,  482,  483,  550,  551,
        553,  558,  559,  560,  561,  570,  571,  572, 5350, 6121, 6973,
       6974, 6975, 6976]),)
Ffill-Interpolation of nan frames


 16%|█▋        | 15/92 [00:40<07:19,  5.71s/it]


feature calculation for GGE0243_c_results_33.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 837)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([823, 830, 834]),)
split 1, range: (840, 3219)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([   2,    3,    4,    5,    6,    7,    8,    9,   10,   11,   12,
         13,   14,  882, 1081, 1656, 1657, 1684, 1719, 2192, 2193, 2194,
       2195, 2196, 2197, 2198, 2199, 2230, 2231, 2232]),)
Ffill-Interpolation of nan frames


 17%|█▋        | 16/92 [00:41<05:36,  4.42s/it]


feature calculation for GGE0131_c_results_6.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2181)
split 1, range: (2281, 3363)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 618,  619,  620,  621,  622,  623,  624,  625,  626,  627,  628,
        629,  630,  631,  632,  633,  634,  635,  636,  637,  638,  639,
        640,  641,  642,  643,  644,  645,  646,  647,  648,  649,  650,
        651,  652,  653,  654,  655,  656,  657,  658,  659,  660,  661,
        662,  663,  664,  665,  666,  667,  668,  669,  670,  671,  672,
        673,  674,  675,  676,  677,  678,  679,  680,  681,  682,  683,
        684,  685,  686,  687,  688,  689,  690,  691,  692,  693,  694,
        695,  696,  697,  698,  699,  700,  701,  702,  703,  704,  705,
        706,  707,  708,  709,  710,  711,  7

 18%|█▊        | 17/92 [00:43<04:31,  3.62s/it]


feature calculation for GGE0243_c_results_75.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2360)
Ffill-Interpolation of nan frames


 20%|█▉        | 18/92 [00:44<03:30,  2.85s/it]


feature calculation for GGE0133_c_results_29.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (3, 2310)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2251, 2253]),)
split 1, range: (2327, 2378)
Ffill-Interpolation of nan frames


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 21%|██        | 19/92 [00:46<03:16,  2.69s/it]


feature calculation for GGE0241_c_results_20.json_labeldata.csv
Area larger than threshold, collision assumed in [range(1, 8), range(10, 11), range(15, 20), range(22, 24), range(25, 26), range(45, 46), range(193, 194), range(1008, 1010), range(1011, 1015), range(1020, 1022), range(1023, 1073), range(1074, 1076), range(1077, 1081), range(1082, 1094), range(1095, 1096), range(1098, 1099), range(1107, 1109), range(1110, 1112), range(1134, 1140), range(1158, 1166), range(1167, 1168), range(1172, 1173), range(1174, 1183), range(1185, 1187), range(1188, 1298), range(1299, 1305), range(1306, 1308), range(1309, 1316), range(1318, 1330), range(1331, 1332), range(1362, 1363), range(1365, 1369), range(1370, 1382), range(1387, 1388), range(1393, 1394), range(1398, 1399), range(1401, 1403), range(1404, 1406), range(1413, 1423), range(1424, 1431), range(1432, 1434), range(1435, 1436), range(1437, 1439), range(1440, 1441), range(1443, 1444), range(1445, 1452), range(1453, 1454), range(1458, 1461), r

  thresh = thresh*np.nanmax(signal)


split 2, range: (1990, 2198)
split 3, range: (2199, 2472)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([12, 13, 15, 16]),)
split 4, range: (2488, 5958)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2577, 2607]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(0, 46) (over 1 sec long)
Exempted from interpolation: range(1008, 1990) (over 1 sec long)


 22%|██▏       | 20/92 [00:49<03:08,  2.62s/it]


feature calculation for GGE0241_c_results_12.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2011)


 23%|██▎       | 21/92 [00:50<02:30,  2.12s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_54.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2956)
Ffill-Interpolation of nan frames


 24%|██▍       | 22/92 [00:51<02:12,  1.89s/it]


feature calculation for GGE0243_c_results_15.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1875)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([   2,    3,    4, ..., 1872, 1873, 1874]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption


 25%|██▌       | 23/92 [00:52<01:48,  1.58s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0241_c_results_19.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3158)
split 1, range: (3162, 3211)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1]),)
split 2, range: (3234, 3459)
split 3, range: (3499, 3669)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 4, range: (3672, 3867)
split 5, range: (3887, 3955)
split 6, range: (3968, 4111)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


Ffill-Interpolation of nan frames
Exempted from interpolation: range(3459, 3499) (over 1 sec long)


 26%|██▌       | 24/92 [00:54<01:57,  1.73s/it]


feature calculation for GGE0243_c_results_65.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2492)


 27%|██▋       | 25/92 [00:55<01:42,  1.53s/it]

Ffill-Interpolation of nan frames
Exempted from interpolation: range(2492, 2527) (over 1 sec long)

feature calculation for GGE0131_c_results_7.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2094)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2088]),)
split 1, range: (2133, 2173)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2094, 2133) (over 1 sec long)
Exempted from interpolation: range(2173, 2205) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 28%|██▊       | 26/92 [00:56<01:30,  1.37s/it]


feature calculation for GGE0243_c_results_8.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 7624)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1137, 1653, 1964, 1965, 1975, 1976, 1977, 1978, 1980, 1981, 1982,
       1983, 1984, 2195, 2323, 2324, 2325, 2326, 2327, 2328, 2330, 2331,
       2332, 2333, 2337, 2338, 2339, 2340, 2348, 2349, 2351, 2352, 4662,
       4663, 4664, 4665, 4666, 4667, 4668, 4721, 4722, 4723, 4724, 4725,
       4726, 4727, 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, 4736,
       4745, 4746, 4747, 6082, 6083, 6084, 7623]),)
Ffill-Interpolation of nan frames


 29%|██▉       | 27/92 [00:59<02:01,  1.88s/it]


feature calculation for GGE0132_c_results_7.json_labeldata.csv
Area larger than threshold, collision assumed in [range(1733, 1752)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 453)
split 1, range: (454, 1733)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  39,  111,  113,  115,  116,  117,  118,  119,  120,  121,  131,
        132,  133,  851,  852,  853,  854,  855,  856,  857,  858,  859,
        860,  861,  862,  863,  864,  865,  866,  867,  868,  869,  870,
        871,  872,  873,  874,  875,  876,  877,  878,  879,  880,  881,
        882,  883,  884,  885,  886,  887,  888,  889,  890,  891,  892,
        893,  894,  895,  896,  897,  898,  899,  900,  901,  902,  903,
        904,  905,  906,  907,  908,  909,  910,  911,  912,  913,  914,
        915,  916,  917,  918,  919,  920,  921,  922,  923,  924,  925,
        926,  927,  928,  929,

 30%|███       | 28/92 [01:07<03:50,  3.60s/it]


feature calculation for GGE0131_c_results_4.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2182)
split 1, range: (2303, 3363)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2182, 2303) (over 1 sec long)


 32%|███▏      | 29/92 [01:08<03:09,  3.00s/it]


feature calculation for GGE0243_c_results_35.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3473)
Ffill-Interpolation of nan frames


 33%|███▎      | 30/92 [01:10<02:36,  2.53s/it]


feature calculation for GGE0243_c_results_60.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2900)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([   2,    3,    4, ..., 2897, 2898, 2899]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames


 34%|███▎      | 31/92 [01:11<02:12,  2.17s/it]


feature calculation for GGE0243_c_results_5.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2938)
split 1, range: (2939, 3043)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([11, 12, 13, 14, 15, 16, 37]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(3043, 3079) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
 35%|███▍      | 32/92 [01:13<02:01,  2.02s/it]


feature calculation for GGE0243_c_results_7.json_labeldata.csv
Area larger than threshold, collision assumed in [range(4439, 4446)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 4439)
split 1, range: (4449, 8095)
Ffill-Interpolation of nan frames


 36%|███▌      | 33/92 [01:16<02:22,  2.42s/it]


feature calculation for GGE0241_c_results_17.json_labeldata.csv
Area larger than threshold, collision assumed in [range(1529, 1543), range(1756, 1763)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1529)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1075, 1105]),)
split 1, range: (1543, 1756)
split 2, range: (1763, 4586)
Ffill-Interpolation of nan frames


 37%|███▋      | 34/92 [01:18<02:14,  2.32s/it]


feature calculation for GGE0243_c_results_39.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (11, 316)
split 1, range: (354, 3119)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 2726, 2727, 2728,
       2729, 2730, 2731, 2732, 2733, 2734, 2735, 2736, 2737, 2738, 2739,
       2740, 2741, 2742, 2743, 2744, 2745, 2746, 2747, 2748, 2749, 2750,
       2751, 2752, 2753, 2754, 2755, 2756, 2757, 2758, 2759, 2760, 2763,
       2764]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(316, 354) (over 1 sec long)


 38%|███▊      | 35/92 [01:19<01:56,  2.04s/it]


feature calculation for GGE0241_c_results_190.json_labeldata.csv
Area larger than threshold, collision assumed in [range(3894, 4497)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (27, 663)
split 1, range: (682, 3894)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(3894, 4497) (over 1 sec long)


 39%|███▉      | 36/92 [01:21<01:54,  2.05s/it]


feature calculation for GGE0241_c_results_186.json_labeldata.csv
Area larger than threshold, collision assumed in [range(3924, 3947), range(3981, 4578)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (2, 56)
split 1, range: (117, 219)
split 2, range: (220, 753)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 3, range: (755, 3924)
split 4, range: (3947, 3981)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(56, 117) (over 1 sec long)
Exempted from interpolation: range(3981, 4578) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 40%|████      | 37/92 [01:24<01:52,  2.04s/it]


feature calculation for GGE0132_c_results_14.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1216)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1210, 1211, 1212, 1214]),)
split 1, range: (1219, 4121)
Ffill-Interpolation of nan frames


 41%|████▏     | 38/92 [01:25<01:46,  1.98s/it]


feature calculation for GGE0241_c_results_10.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3229)
split 1, range: (3230, 3484)
split 2, range: (3500, 3888)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([236]),)
Ffill-Interpolation of nan frames


 42%|████▏     | 39/92 [01:27<01:42,  1.94s/it]


feature calculation for GGE0133_c_results_50.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2431)
Ffill-Interpolation of nan frames


 43%|████▎     | 40/92 [01:28<01:28,  1.70s/it]


feature calculation for GGE0243_c_results_0.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 14318)
Ffill-Interpolation of nan frames


 45%|████▍     | 41/92 [01:34<02:27,  2.89s/it]


feature calculation for GGE0241_c_results_5.json_labeldata.csv
Area larger than threshold, collision assumed in [range(96, 97), range(98, 99), range(101, 102), range(103, 105), range(106, 107), range(109, 110), range(111, 118)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 96)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 68, 83, 88, 90, 92,
       94, 95]),)
split 1, range: (118, 3157)


  thresh = thresh*np.nanmax(signal)


Ffill-Interpolation of nan frames


 46%|████▌     | 42/92 [01:36<02:03,  2.48s/it]


feature calculation for GGE0243_c_results_9.json_labeldata.csv
Area larger than threshold, collision assumed in [range(7293, 7306)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 7293)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([4569, 4571, 4574, ..., 7290, 7291, 7292]),)
split 1, range: (7306, 8530)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  4,  10,  16, 241, 244, 245, 257]),)
Ffill-Interpolation of nan frames


 47%|████▋     | 43/92 [01:42<02:54,  3.57s/it]


feature calculation for GGE0241_c_results_14.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2533)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([324, 325]),)
Ffill-Interpolation of nan frames


 48%|████▊     | 44/92 [01:46<03:06,  3.88s/it]


feature calculation for GGE0243_c_results_56.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3594)
Ffill-Interpolation of nan frames


 49%|████▉     | 45/92 [01:51<03:21,  4.28s/it]


feature calculation for GGE0241_c_results_175.json_labeldata.csv
Area larger than threshold, collision assumed in [range(4176, 4177), range(4178, 4363), range(4364, 4366), range(4367, 4369), range(4371, 4372), range(4374, 4375), range(4381, 4382), range(4383, 4384), range(4387, 4390), range(4391, 4478), range(4480, 4481), range(4483, 4493), range(4505, 4506), range(4507, 4784)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 62)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([51]),)
split 1, range: (63, 4176)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([31]),)


  thresh = thresh*np.nanmax(signal)


Ffill-Interpolation of nan frames
Exempted from interpolation: range(4176, 4784) (over 1 sec long)


 50%|█████     | 46/92 [01:54<02:47,  3.65s/it]


feature calculation for GGE0133_c_results_2.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2101)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([384]),)
Ffill-Interpolation of nan frames


 51%|█████     | 47/92 [01:55<02:11,  2.92s/it]


feature calculation for GGE0132_c_results_15.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 12185)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([   2,    3,    4, ..., 6775, 6776, 7303]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames


 52%|█████▏    | 48/92 [02:00<02:31,  3.45s/it]


feature calculation for GGE0241_c_results_8.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3156)
split 1, range: (3162, 3206)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 53%|█████▎    | 49/92 [02:01<02:02,  2.84s/it]


feature calculation for GGE0243_c_results_12.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 5827)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1125, 1126, 1127, 1128, 1129, 1306, 1308, 1310, 1314, 4210]),)
Ffill-Interpolation of nan frames


 54%|█████▍    | 50/92 [02:03<01:53,  2.71s/it]


feature calculation for GGE0241_c_results_37.json_labeldata.csv
Area larger than threshold, collision assumed in [range(0, 7), range(31, 33), range(50, 54), range(55, 59), range(60, 61), range(62, 63), range(65, 75), range(76, 81), range(82, 83), range(97, 105), range(116, 117), range(119, 120), range(124, 125), range(126, 128), range(129, 135), range(136, 202), range(210, 214), range(222, 224), range(225, 227), range(228, 248), range(249, 250), range(256, 257), range(259, 261), range(262, 289), range(291, 314), range(315, 316), range(322, 323), range(332, 354), range(355, 356), range(358, 927), range(928, 929), range(1191, 1192), range(1248, 1249), range(1251, 1255), range(1256, 1257), range(1293, 1294), range(1296, 1304), range(1316, 1317), range(1322, 1325), range(1331, 1347), range(1348, 1352), range(1354, 1355), range(1356, 1360), range(1361, 1362), range(1397, 1398), range(1407, 1408), range(1424, 1425), range(1459, 1460), range(1462, 1472), range(1473, 1476), range(1479, 1482),

  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 4, range: (1425, 1459)
split 5, range: (1695, 2119)
split 6, range: (2122, 2403)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([3, 5]),)
split 7, range: (2451, 2534)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
split 8, range: (2546, 2992)
split 9, range: (3000, 3166)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 57, 107]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption


  thresh = thresh*np.nanmax(signal)


split 10, range: (3175, 3269)
split 11, range: (3316, 3649)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([13, 16, 20, 82]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(0, 929) (over 1 sec long)
Exempted from interpolation: range(1293, 1362) (over 1 sec long)
Exempted from interpolation: range(1459, 1695) (over 1 sec long)
Exempted from interpolation: range(2403, 2451) (over 1 sec long)
Exempted from interpolation: range(3269, 3316) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
 55%|█████▌    | 51/92 [02:05<01:41,  2.48s/it]


feature calculation for GGE0241_c_results_205.json_labeldata.csv
Area larger than threshold, collision assumed in [range(2081, 2082), range(2085, 2086), range(2087, 2088), range(2089, 2646)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2081)


 57%|█████▋    | 52/92 [02:06<01:22,  2.06s/it]

Ffill-Interpolation of nan frames
Exempted from interpolation: range(2081, 2646) (over 1 sec long)

feature calculation for GGE0132_c_results_19.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2701)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  70,   71,   72,   73,   74,   75,   76,   77,   78,   79,   80,
         81,   82,   83,   84,   85,   86,   87,   88,   89,   90,   91,
         92,   93,   94,   95,   96,   97,   98,  102,  105,  106,  107,
        108,  109,  110,  152,  153,  154,  155,  156,  157,  158,  159,
       2219]),)


 58%|█████▊    | 53/92 [02:08<01:09,  1.78s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0241_c_results_11.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 522)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([237, 238, 239]),)
split 1, range: (550, 3102)
Ffill-Interpolation of nan frames


 59%|█████▊    | 54/92 [02:09<01:02,  1.64s/it]


feature calculation for GGE0243_c_results_69.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3709)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([165]),)
Ffill-Interpolation of nan frames


 60%|█████▉    | 55/92 [02:10<01:00,  1.64s/it]


feature calculation for GGE0133_c_results_0.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1848)


 61%|██████    | 56/92 [02:11<00:50,  1.40s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_30.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (3, 2264)


 62%|██████▏   | 57/92 [02:12<00:46,  1.33s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_11.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1848)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([337, 349, 350, 354, 355, 356, 357, 358]),)


 63%|██████▎   | 58/92 [02:13<00:40,  1.18s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_32.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 138)
split 1, range: (144, 2199)


  thresh = thresh*np.nanmax(signal)


split 2, range: (2216, 2267)
Ffill-Interpolation of nan frames


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 64%|██████▍   | 59/92 [02:14<00:38,  1.18s/it]


feature calculation for GGE0243_c_results_34.json_labeldata.csv
Area larger than threshold, collision assumed in [range(3900, 3915)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3900)
split 1, range: (3923, 5139)
Ffill-Interpolation of nan frames


 65%|██████▌   | 60/92 [02:17<00:46,  1.46s/it]


feature calculation for GGE0131_c_results_14.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2143)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([315]),)


 66%|██████▋   | 61/92 [02:18<00:40,  1.30s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_10.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2840)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2542, 2543, 2544, 2545, 2546, 2547, 2548, 2549, 2550, 2551, 2552,
       2553, 2554, 2555, 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563,
       2564, 2565, 2566, 2567, 2568, 2577, 2578, 2579, 2580, 2581, 2582]),)
Ffill-Interpolation of nan frames


 67%|██████▋   | 62/92 [02:19<00:38,  1.28s/it]


feature calculation for GGE0131_c_results_44.json_labeldata.csv
Area larger than threshold, collision assumed in [range(2066, 2073), range(2078, 2092)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (113, 2066)
split 1, range: (2092, 2636)
split 2, range: (2646, 3532)
split 3, range: (3550, 3590)
split 4, range: (3598, 3714)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(0, 113) (over 1 sec long)
Exempted from interpolation: range(3714, 3750) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 68%|██████▊   | 63/92 [02:21<00:42,  1.45s/it]


feature calculation for GGE0243_c_results_21.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 6813)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([6764, 6765, 6766, 6767, 6768, 6769, 6770, 6771, 6772, 6773, 6774,
       6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 6784, 6785,
       6786, 6787, 6788, 6789, 6790, 6791, 6792, 6793, 6794, 6795, 6796,
       6797, 6798, 6799, 6800, 6801, 6802, 6803, 6804, 6805, 6806, 6807,
       6808, 6809, 6810, 6811, 6812]),)
split 1, range: (6853, 6920)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(6813, 6853) (over 1 sec long)


  thresh = thresh*np.nanmax(signal)
 70%|██████▉   | 64/92 [02:23<00:52,  1.86s/it]


feature calculation for GGE0243_c_results_16.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 4043)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 155,  156,  157, ..., 4040, 4041, 4042]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames


 71%|███████   | 65/92 [02:25<00:49,  1.82s/it]


feature calculation for GGE0243_c_results_67.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (6, 8262)
Ffill-Interpolation of nan frames


 72%|███████▏  | 66/92 [02:28<00:58,  2.24s/it]


feature calculation for GGE0241_c_results_194.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (24, 588)
split 1, range: (596, 3586)
Ffill-Interpolation of nan frames


 73%|███████▎  | 67/92 [02:30<00:51,  2.06s/it]


feature calculation for GGE0243_c_results_47.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (9, 2658)


 74%|███████▍  | 68/92 [02:31<00:43,  1.83s/it]

Ffill-Interpolation of nan frames
Exempted from interpolation: range(2658, 2711) (over 1 sec long)

feature calculation for GGE0241_c_results_199.json_labeldata.csv
Area larger than threshold, collision assumed in [range(3202, 3810)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (2, 3202)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([818, 819]),)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(3202, 3810) (over 1 sec long)


 75%|███████▌  | 69/92 [02:33<00:39,  1.72s/it]


feature calculation for GGE0132_c_results_8.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 15937)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  95, 9720, 9721]),)
Ffill-Interpolation of nan frames


 76%|███████▌  | 70/92 [02:39<01:07,  3.05s/it]


feature calculation for GGE0243_c_results_73.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2400)


 77%|███████▋  | 71/92 [02:40<00:52,  2.49s/it]

Ffill-Interpolation of nan frames

feature calculation for GGE0133_c_results_47.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 4531)
Ffill-Interpolation of nan frames


 78%|███████▊  | 72/92 [02:42<00:47,  2.38s/it]


feature calculation for GGE0133_c_results_13.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2842)
Ffill-Interpolation of nan frames


 79%|███████▉  | 73/92 [02:43<00:38,  2.04s/it]


feature calculation for GGE0132_c_results_17.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3780)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347,
       1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358,
       1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369,
       1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380,
       1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391,
       1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402,
       1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413,
       1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424,
       1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435,


 80%|████████  | 74/92 [02:48<00:48,  2.70s/it]


feature calculation for GGE0243_c_results_1.json_labeldata.csv
Area larger than threshold, collision assumed in [range(518, 531)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 518)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
       495, 496, 497, 502, 503, 507, 508, 509, 510, 511, 513, 514, 515,
       516, 517]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
split 1, range: (531, 8568)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 1,  4,  6,  8,  9, 10, 13, 14, 15]),)
Ffill-Interpolation of nan frames


 82%|████████▏ | 75/92 [02:52<00:51,  3.05s/it]


feature calculation for GGE0243_c_results_3.json_labeldata.csv
Area larger than threshold, collision assumed in [range(10, 12), range(14, 16)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (16, 11915)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([    2,     7,     8, ..., 11896, 11897, 11898]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
Ffill-Interpolation of nan frames


 83%|████████▎ | 76/92 [02:56<00:56,  3.51s/it]


feature calculation for GGE0133_c_results_46.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3547)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2177, 2178, 2179, 2180, 2181, 2189, 2190, 2191, 2192, 2193, 2194,
       2195, 2196, 2197, 2198, 2199, 2200, 2201, 2202, 2203, 2204, 2205,
       2206, 2207, 2208, 2209, 2210]),)
Ffill-Interpolation of nan frames


 84%|████████▎ | 77/92 [02:58<00:43,  2.90s/it]


feature calculation for GGE0241_c_results_7.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3182)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(3182, 3221) (over 1 sec long)


 85%|████████▍ | 78/92 [02:59<00:34,  2.45s/it]


feature calculation for GGE0132_c_results_6.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3154)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2062, 3149, 3152]),)
split 1, range: (3183, 3229)
split 2, range: (3303, 4515)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([  11,   12,   13, ..., 1209, 1210, 1211]),)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


Ffill-Interpolation of nan frames
Exempted from interpolation: range(3229, 3303) (over 1 sec long)


 86%|████████▌ | 79/92 [03:01<00:29,  2.30s/it]


feature calculation for GGE0241_c_results_18.json_labeldata.csv
Area larger than threshold, collision assumed in [range(0, 35), range(37, 38), range(39, 54), range(65, 70), range(93, 94), range(96, 98), range(99, 100), range(170, 171), range(994, 998), range(999, 1000), range(1005, 1010), range(1011, 1012), range(1014, 1015), range(1024, 1064), range(1066, 1068), range(1069, 1073), range(1074, 1076), range(1077, 1082), range(1085, 1086), range(1092, 1094), range(1098, 1099), range(1100, 1130), range(1131, 1132), range(1133, 1138), range(1142, 1144), range(1145, 1160), range(1176, 1181), range(1182, 1184), range(1185, 1187), range(1188, 1190), range(1191, 1202), range(1203, 1205), range(1206, 1207), range(1209, 1211), range(1212, 1221), range(1222, 1223), range(1224, 1237), range(1239, 1240), range(1241, 1282), range(1283, 1288), range(1290, 1293), range(1294, 1298), range(1299, 1344), range(1345, 1361), range(1362, 1364), range(1366, 1369), range(1370, 1371), range(1372, 1374), range(

  thresh = thresh*np.nanmax(signal)


split 2, range: (1943, 2362)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([314]),)
split 3, range: (2363, 2407)
Worm seems to travel backwards the majority of time, flipped all centerlines to agree with forward travelling assumption
split 4, range: (2408, 2711)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([259, 260, 261, 273, 276, 277, 278, 279, 280, 282, 283, 286, 288,
       289, 290, 291, 296, 298, 300, 301, 302]),)


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)


split 5, range: (2712, 3782)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([   1,    2,    3,    4,    5,    6,    7,   11,   14,   29,   30,
         31,   32,   33,   34,   35,   36,   37,   39,   87,   88,   89,
         90,   91,   92,   93,   94,   95,   96,   97,   98,   99,  100,
        101,  102,  103,  104,  105,  106,  107,  108,  109,  110,  111,
        112,  113,  114,  115,  116,  117,  118,  119,  120,  121,  122,
        123,  124,  125,  126,  127,  128,  129,  130,  131,  132,  133,
        134,  135,  136,  137,  138,  139,  140,  141,  142,  143,  144,
        145,  146,  147,  148,  149,  150,  151,  152,  153,  154,  155,
        156,  157,  158,  159,  160,  161,  162,  163,  164,  165,  166,
        167,  168,  169,  170,  171,  172,  173,  174,  175,  176,  177,
        178,  179,  180,  181,  182,  183,  184,  185,  186,  187,  188,
        189,  190,  191,  192,  193,  194,  195,  196,  197,  198,  199,
        200, 

 87%|████████▋ | 80/92 [03:04<00:30,  2.51s/it]


feature calculation for GGE0131_c_results_8.json_labeldata.csv
Area larger than threshold, collision assumed in [range(2094, 2110), range(2127, 2128), range(2129, 2133)].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2094)
split 1, range: (2133, 5051)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2094, 2133) (over 1 sec long)


 88%|████████▊ | 81/92 [03:06<00:26,  2.40s/it]


feature calculation for GGE0131_c_results_13.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1479)
split 1, range: (1485, 3933)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 2,  3,  7,  9, 10, 12, 13, 14]),)
Ffill-Interpolation of nan frames


 89%|████████▉ | 82/92 [03:08<00:21,  2.18s/it]


feature calculation for GGE0132_c_results_12.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (8, 3097)
Ffill-Interpolation of nan frames


 90%|█████████ | 83/92 [03:09<00:17,  1.93s/it]


feature calculation for GGE0132_c_results_0.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 14207)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([ 4274,  4275,  4276,  4277,  4278,  4282, 12407, 12408, 12409,
       12410, 12411, 12416, 12417, 12418, 12419, 12420, 12421, 12422,
       12423, 12424, 12425, 12426, 12427]),)
Ffill-Interpolation of nan frames


 91%|█████████▏| 84/92 [03:15<00:24,  3.08s/it]


feature calculation for GGE0132_c_results_9.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3614)
Ffill-Interpolation of nan frames


 92%|█████████▏| 85/92 [03:16<00:18,  2.62s/it]


feature calculation for GGE0243_c_results_10.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 7612)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1185, 1196, 5920]),)
Ffill-Interpolation of nan frames


 93%|█████████▎| 86/92 [03:19<00:16,  2.75s/it]


feature calculation for GGE0132_c_results_10.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3246)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([524]),)
Ffill-Interpolation of nan frames


 95%|█████████▍| 87/92 [03:21<00:11,  2.34s/it]


feature calculation for GGE0131_c_results_3.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2181)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([1730]),)
split 1, range: (2311, 3374)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2181, 2311) (over 1 sec long)


 96%|█████████▌| 88/92 [03:22<00:08,  2.09s/it]


feature calculation for GGE0131_c_results_18.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 1738)
split 1, range: (1740, 1829)
split 2, range: (1837, 1907)
Ffill-Interpolation of nan frames


  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
 97%|█████████▋| 89/92 [03:23<00:05,  1.76s/it]


feature calculation for GGE0131_c_results_15.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2177)
split 1, range: (2297, 3363)
Ffill-Interpolation of nan frames
Exempted from interpolation: range(2177, 2297) (over 1 sec long)


 98%|█████████▊| 90/92 [03:25<00:03,  1.67s/it]


feature calculation for GGE0241_c_results_15.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 3213)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2204, 3072, 3092, 3151, 3152, 3153, 3154, 3155, 3175]),)
Ffill-Interpolation of nan frames


 99%|█████████▉| 91/92 [03:26<00:01,  1.61s/it]


feature calculation for GGE0131_c_results_2.json_labeldata.csv
Area larger than threshold, collision assumed in [].
Calculation of features will be done in splits, ignoring those and adjacent* ranges. *That are less than 1 sec long.
split 0, range: (0, 2177)
Following frames seem to be tracked upside down
We are going to flip those back:
(array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021,
       2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032,
       2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043,
       2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054,
       2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065,
       2066, 2067, 2068, 2069, 2070, 2071, 2072, 2073, 2074, 2075, 2076,
       2077, 2078, 2079, 2080, 2081, 2082, 2083, 2084, 2085, 2086, 2087,
       2088, 2089, 2090, 2091, 2092, 2093, 2094, 2095, 2096, 2097, 2098,
 

  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
  thresh = thresh*np.nanmax(signal)
100%|██████████| 92/92 [03:28<00:00,  2.26s/it]


In [11]:
# %%
for fpath in tqdm.tqdm(all_engine):
    fn = os.path.basename(fpath)
    dir_engine = os.path.dirname(fpath)
    if skip_already and fn in os.listdir(outpath):
        continue
    if not fn[0] == '.' and not fn in prediction_done and os.path.isfile(fpath):
        print(fn)
        break

  0%|          | 0/92 [00:00<?, ?it/s]

GGE0133_c_results_13_prediction.json





In [12]:
d = load_tolist(fpath, droplabelcol=False)[0]

In [23]:
X = augsel.fit_transform(d)
X = imp.fit_transform(X) # model seems to run well without
#X = X.add_suffix('_tr') # not longer needed once new model has been trained

pred = model.predict(X)
proba = model.predict_proba(X)

proba_max = np.amax(proba, axis=1) ### New
proba_max_mean = pd.DataFrame(proba_max).rolling(30, min_periods=1).mean().values ### New
proba_low50 = np.all(proba_max_mean < .5, axis=1) ### New
pred[proba_low50] = -1 ### NEW
pred = pd.Series(pred, index=X.index, name='prediction').reindex(d.index, method='bfill', limit=29).fillna(-1) ### NEW
proba = pd.DataFrame(proba, index=X.index, columns=[f'proba_{i}' for i in range(proba.shape[1])]).reindex(d.index, method='bfill', limit=29).fillna(0)

p_out = pd.concat([d, pred, proba], axis=1) #d, 

In [25]:
p_out.columns

Index(['area', 'rate', 'negskew_clean', 'reversal_rate', 'velocity',
       'velocity_mean', 'velocity_dt60', 'velocity_dt150', 'area_maxfreq',
       'rate_maxfreq', 'negskew_clean_cwt0.30', 'negskew_clean_cwt0.82',
       'negskew_clean_cwt1.34', 'negskew_clean_cwt1.87',
       'negskew_clean_cwt2.39', 'negskew_clean_cwt2.91',
       'negskew_clean_cwt3.43', 'negskew_clean_cwt3.96',
       'negskew_clean_cwt4.48', 'negskew_clean_cwt5.00',
       'negskew_clean_maxfreq', 'tip2cm_arccos_maxfreq',
       'reversal_rate_maxfreq', 'velocity_maxfreq', 'velocity_mean_maxfreq',
       'velocity_dt60_cwt5.00', 'velocity_dt60_cwt1.82',
       'velocity_dt60_cwt1.12', 'velocity_dt60_cwt0.80',
       'velocity_dt60_cwt0.63', 'velocity_dt60_cwt0.52',
       'velocity_dt60_cwt0.44', 'velocity_dt60_cwt0.38',
       'velocity_dt60_cwt0.33', 'velocity_dt60_cwt0.30',
       'velocity_dt60_maxfreq', 'velocity_dt150_maxfreq', 'prediction',
       'proba_0', 'proba_1', 'proba_2', 'proba_3', 'proba_4', 'p

In [18]:
if os.path.isfile(fpath):
    with open(fpath, "r") as jsonfile:
        recording = json.load(jsonfile)

In [19]:
recording

{'columns': ['area',
  'rate',
  'negskew_clean',
  'reversal_rate',
  'velocity',
  'velocity_mean',
  'velocity_dt60',
  'velocity_dt150',
  'area_maxfreq',
  'rate_maxfreq',
  'negskew_clean_cwt0.30',
  'negskew_clean_cwt0.82',
  'negskew_clean_cwt1.34',
  'negskew_clean_cwt1.87',
  'negskew_clean_cwt2.39',
  'negskew_clean_cwt2.91',
  'negskew_clean_cwt3.43',
  'negskew_clean_cwt3.96',
  'negskew_clean_cwt4.48',
  'negskew_clean_cwt5.00',
  'negskew_clean_maxfreq',
  'tip2cm_arccos_maxfreq',
  'reversal_rate_maxfreq',
  'velocity_maxfreq',
  'velocity_mean_maxfreq',
  'velocity_dt60_cwt5.00',
  'velocity_dt60_cwt1.82',
  'velocity_dt60_cwt1.12',
  'velocity_dt60_cwt0.80',
  'velocity_dt60_cwt0.63',
  'velocity_dt60_cwt0.52',
  'velocity_dt60_cwt0.44',
  'velocity_dt60_cwt0.38',
  'velocity_dt60_cwt0.33',
  'velocity_dt60_cwt0.30',
  'velocity_dt60_maxfreq',
  'velocity_dt150_maxfreq'],
 'index': [0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  

In [None]:
recording.update(p_out.to_dict())
jsnF = json.dumps(recording, indent = 4, cls=NanConverter)

In [None]:
with open(fpath, "w") as outfile:
    outfile.write(jsnF)

In [20]:
p_out.to_dict()

{'prediction': {0: -1.0,
  1: -1.0,
  2: -1.0,
  3: -1.0,
  4: -1.0,
  5: -1.0,
  6: -1.0,
  7: -1.0,
  8: -1.0,
  9: -1.0,
  10: -1.0,
  11: -1.0,
  12: -1.0,
  13: -1.0,
  14: -1.0,
  15: -1.0,
  16: -1.0,
  17: -1.0,
  18: -1.0,
  19: -1.0,
  20: -1.0,
  21: -1.0,
  22: -1.0,
  23: -1.0,
  24: -1.0,
  25: -1.0,
  26: -1.0,
  27: -1.0,
  28: -1.0,
  29: -1.0,
  30: 5.0,
  31: 5.0,
  32: 5.0,
  33: 5.0,
  34: 5.0,
  35: 5.0,
  36: 5.0,
  37: 5.0,
  38: 5.0,
  39: 5.0,
  40: 5.0,
  41: 5.0,
  42: 5.0,
  43: 5.0,
  44: 5.0,
  45: 5.0,
  46: 5.0,
  47: 5.0,
  48: 5.0,
  49: 5.0,
  50: 5.0,
  51: 5.0,
  52: 5.0,
  53: 5.0,
  54: 5.0,
  55: 5.0,
  56: 5.0,
  57: 5.0,
  58: 5.0,
  59: 5.0,
  60: 5.0,
  61: 5.0,
  62: 5.0,
  63: 5.0,
  64: 5.0,
  65: 5.0,
  66: 5.0,
  67: 5.0,
  68: 5.0,
  69: 5.0,
  70: 5.0,
  71: 5.0,
  72: 5.0,
  73: 5.0,
  74: 5.0,
  75: 5.0,
  76: 5.0,
  77: 5.0,
  78: 5.0,
  79: 5.0,
  80: 5.0,
  81: 5.0,
  82: 5.0,
  83: 5.0,
  84: 5.0,
  85: 5.0,
  86: 5.0,
  87: 5.0