## This notebooks concatenates multiple Jarvis trainingsets into a single COCO formatted trainingset
- **NOTE:** I reccomend using a different trainingset for detection vs pose prediction
- **detection**: use a smaller dataset where data is closer to your specific rig / context
- **pose recognition**: use a large and more diverse dataset, which can contain different species/strains in diverse conditions

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime
from pathlib import Path
from pprint import pprint
import matplotlib.pyplot as plt

#### Parameters

In [3]:
# where you want to save your training set
output_directory = Path("/n/groups/datta/tim_sainburg/datasets/scratch/test-training-sets")

# what to name the dataset
trainingset_name = "test-trainingset"

In [4]:
# this needs to match with the order used in mmpose
keypoints_order = [
    "nose_tip",
    "left_ear",
    "right_ear",
    "left_eye",
    "right_eye",
    "throat",
    "forehead",
    "left_shoulder",
    "right_shoulder",
    "left_elbow",
    "right_elbow",
    "left_wrist",
    "right_wrist",
    "left_hind_paw_front",
    "right_hind_paw_front",
    "left_hind_paw_back",
    "right_hind_paw_back",
    "left_knee",
    "right_knee",
    "tail_base",
    "spine_low",
    "spine_mid",
    "spine_high",
    "left_fore_paw",
    "right_fore_paw",
]

### Select which labelling sets to use
- **Note**: For the benefit of the rest of the lab, share the datasets you annotate

In [5]:
dataset_locs = [
    
    # 240502
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240502-npx_M04002-20-frames-dull/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240502-npx_M04003-20-frames-v2/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240502-npx_M04003-24-frames-v3/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240502-npx_M04002-20-frames-shiny/",
        "use_in_validation": True,
    },
    
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-03-NUB-37954/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SM2-37837/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SM2-37838/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-NUB-38027/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-bk-38125/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SW-38157/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-NUB-37956/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-bk-38129/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-BW-38102/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-NUB-37952/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SW-38158/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-bk-38127/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SW-28247/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/23-10-25-SM2-37837/",
        "use_in_validation": True,
    },
    
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/20230904_CALEB_JONAH/",
        "use_in_validation": False,
    },
    
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240423-npx_apollo-30-frames/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/240423-npx_apollo-20-frames/",
        "use_in_validation": True,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/23-04-27-neural-recording/",
        "use_in_validation": False,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/23-05-01-14-37-54__23-04-20-headcap/",
        "use_in_validation": False,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/23-05-15-tethered-neural/",
        "use_in_validation": False,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/23-08-16-chronic_recordings_23-02-16-17-39-39-427329_larger/",
        "use_in_validation": False,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/400_frames_midspine/",
        "use_in_validation": False,
    },
    # multi-mouse
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/20240314-sample-neuropixels-clear-100/",
        "use_in_validation": False,
    },
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-10-29-jarvis-datasets/sainburg-25pt/1920x1200/20240110-nacho-LOLO/",
        "use_in_validation": False,
    },
    
    # downloaded datasets
    {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/Gerbil2_bw_midspine/",
        "use_in_validation": False,
    },
        {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/Mouse_bw_midspine/",
        "use_in_validation": False,
    },
        {
        "location": "/n/groups/datta/tim_sainburg/projects/23-09-29-peromoseq/data/jarvis/labeling_datasets/resized/Rat_Dataset_bw_midspine/",
        "use_in_validation": False,
    },

]

### Create trainingset

In [6]:
from multicamera_labelling_and_training.labelling_set_to_training_set import TrainingSetCreator

In [7]:
tsc = TrainingSetCreator(
    output_directory=output_directory,
    trainingset_name=trainingset_name,
    percent_validation = 0.1,
    keypoints_order = keypoints_order,
    padding = 60 # how much padding to place around the 
)

In [8]:
tsc.run(labelling_sets=labelling_sets)

copying images from 23-10-03-NUB-37954:   0%|          | 0/6 [00:00<?, ?it/s]

BackLeft:   0%|          | 0/50 [00:00<?, ?it/s]

BackRight:   0%|          | 0/50 [00:00<?, ?it/s]

Bottom:   0%|          | 0/50 [00:00<?, ?it/s]

FrontLeft:   0%|          | 0/50 [00:00<?, ?it/s]

FrontRight:   0%|          | 0/50 [00:00<?, ?it/s]

Top:   0%|          | 0/50 [00:00<?, ?it/s]

copying images from 23-10-25-SM2-37837:   0%|          | 0/6 [00:00<?, ?it/s]

BackLeft:   0%|          | 0/50 [00:00<?, ?it/s]

BackRight:   0%|          | 0/50 [00:00<?, ?it/s]

Bottom:   0%|          | 0/50 [00:00<?, ?it/s]

FrontLeft:   0%|          | 0/50 [00:00<?, ?it/s]

FrontRight:   0%|          | 0/50 [00:00<?, ?it/s]

Top:   0%|          | 0/50 [00:00<?, ?it/s]

framesets:   0%|          | 0/100 [00:00<?, ?it/s]