# add frames to training set
After training an initial network, watch videos with overlaid tracking created with `label\make_training_vid.py`. Identify frames that should be re-included in the training set (with annotations with current network's output). Use this notebook to merge these frames with an existing dataset. 

In [7]:
from deepposekit.io.utils import merge_new_images
from deepposekit.io import VideoReader
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import numpy as np
import sys
import cv2
import os
sys.path.append(os.path.abspath(os.path.join('..')))  # add partent directory
from predict.utils import make_tracking_video
%load_ext autoreload
%autoreload 2

# settings
old_dataset = r'D:\github\fish-tracking\label\datasets\tank_dataset_9.h5'
merged_dataset = r'D:\github\fish-tracking\label\datasets\tank_dataset_temp.h5'
video = r'Z:\C1\free\vids\20201001_Fred\concatenated.avi'

frame_nums = [0, 1000, 2000]
frame_nums.sort()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
# load predictions
# (assumes video_tracking.csv already exists)
predictions = pd.read_csv(os.path.splitext(video)[0] + '_tracking.csv').to_numpy()[:,1:]
predictions = predictions.reshape((-1, int(predictions.shape[1]/3), 3))  # (frane_num X feature_num X (x,y,confidence))
keypoints = predictions[frame_nums]

I load images by scrolling through all the frames below... Directly indexing into the video results in incorrect frames being drawn from the original video. This is probably because the original (but not the tracking) video has an incorrect frame count number. I coudln't figure out how to fix this in opencv...

In [9]:
# load images
reader = VideoReader(video, batch_size=1, fast_frame_count=True)

images = []
for i in tqdm(range(frame_nums[-1]+1)):
    if i in frame_nums:
        images.append(reader.read())
    else:
        reader.read()
images = np.stack([i[...,0][...,np.newaxis] for i in images])  # replace color with singleton dimension and concatenate

# images = np.concatenate([reader[idx][...,0] for idx in frame_nums])[...,np.newaxis]  # direct indexing into the reader draws incorrect frames for mysterious reasons...

100%|█████████████████████████████████████████████████████████████████████████| 2001/2001 [00:00<00:00, 3598.10it/s]


In [12]:
# make merged dataset
merge_new_images(
    datapath=old_dataset,
    merged_datapath=merged_dataset,
    images=images,
    keypoints=keypoints,
    overwrite=False
)


# add frames from spreadsheet
Download [this](https://docs.google.com/spreadsheets/d/1_kMZSCiMDglHKb363tWUf2j9IzwLFfPrArS-zXin_94/edit?usp=sharing) spreadsheet as a .csv, and add frames and videos to training as specified in the spreadsheet. *Only works for head free videos.*

In [1]:
from deepposekit.io.utils import merge_new_images
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import numpy as np
import yaml
import sys
import cv2
import os
sys.path.append(os.path.abspath(os.path.join('..')))  # add partent directory
root_dir = r'D:\github\fish-tracking'  # github repo


# settings
old_dataset = r'D:\github\fish-tracking\label\datasets\tank_dataset_9.h5'
merged_dataset = r'D:\github\fish-tracking\label\datasets\tank_dataset_10.h5'
spreadsheet = r'C:\Users\rick\Desktop\c1_tracking_errors.csv'


# inits
with open(os.path.join(root_dir, 'config.yaml'), 'r') as file:
    cfg = yaml.safe_load(file)['free']

tracking_errors = pd.read_csv(spreadsheet)
vidnames = [os.path.join(cfg['data_dir'], str(r['Date']) + '_' + r['Fish'], 'concatenated.avi')
            for (i, r) in tracking_errors.iterrows()]
tracking_errors['FileName'] = vidnames
vidnames = np.unique(vidnames)
    

In [14]:
# get frames
images, keypoints = [], []
for idx, vidname in enumerate(vidnames):
    predictionsname = os.path.splitext(vidname)[0] + '_tracking.csv'
    
    if os.path.exists(vidname) and os.path.exists(predictionsname):
        vid_bins = tracking_errors['FileName']==vidname
        frames = tracking_errors.loc[vid_bins, 'Frame'].tolist()
        frames = np.sort(np.array(frames))
        
        print('\n({:3}/{}) {} frames:\n{}'.format(
            idx+1, len(vidnames), vidname, frames))

        # load predictions
        predictions = pd.read_csv(predictionsname, nrows=(frames[-1]+1)).to_numpy()[:,1:]
        predictions = predictions.reshape((-1, int(predictions.shape[1]/3), 3))  # (frane_num X feature_num X (x,y,confidence))
        keypoints.append(predictions[frames])
        del predictions

        # load frame
        reader = cv2.VideoCapture(vidname)
        for i in range(frames[-1]+1):
            if np.isin(i, frames):
                images.append(reader.read()[1])
            else:        
                reader.read()
        reader.release()
        
    else:
        print('{}: video does not exist!'.format(vidname))

images = np.stack([i[...,0][...,np.newaxis] for i in images])  # replace color with singleton dimension and concatenate
keypoints = np.concatenate(keypoints, axis=0)


(  1/16) Z:\C1\free\vids\20201122_Igor\concatenated.avi frames:
[  766  1491  1934  4355  5300  6948  8569  8744  9822 11698 13492 16578]


100%|███████████████████████████████████████████████████████████████████████| 16579/16579 [00:05<00:00, 3151.34it/s]



(  2/16) Z:\C1\free\vids\20201122_Joao\concatenated.avi frames:
[   563   1418   1877   4397  10953  12413  14798  19410  23064  23662
  26437  28699  29410  29813  30121  31435 118027 120702 120980 127964
 128192 128257 128416 128459 128699 230831 232775 310329 311377 311663
 311905 314615]


100%|█████████████████████████████████████████████████████████████████████| 314616/314616 [02:08<00:00, 2450.87it/s]



(  3/16) Z:\C1\free\vids\20201128_Mark\concatenated.avi frames:
[ 1625  7228 11116 12145 12408 14579 22043]


100%|███████████████████████████████████████████████████████████████████████| 22044/22044 [00:06<00:00, 3299.43it/s]



(  4/16) Z:\C1\free\vids\20201201_Kyle\concatenated.avi frames:
[42795 43725 47232 56964]


100%|███████████████████████████████████████████████████████████████████████| 56965/56965 [00:16<00:00, 3403.78it/s]



(  5/16) Z:\C1\free\vids\20201202_Neil\concatenated.avi frames:
[ 10923  12356  48726  76470 162207]


100%|█████████████████████████████████████████████████████████████████████| 162208/162208 [00:48<00:00, 3358.74it/s]



(  6/16) Z:\C1\free\vids\20201202_Omar\concatenated.avi frames:
[ 5088 15840 24372 41676]


100%|███████████████████████████████████████████████████████████████████████| 41677/41677 [00:12<00:00, 3362.63it/s]



(  7/16) Z:\C1\free\vids\20201204_Greg\concatenated.avi frames:
[ 18290  92008 109420 117460 131872 143464 147038 163275]


100%|█████████████████████████████████████████████████████████████████████| 163276/163276 [00:49<00:00, 3276.92it/s]



(  8/16) Z:\C1\free\vids\20201210_Greg\concatenated.avi frames:
[1224]


100%|█████████████████████████████████████████████████████████████████████████| 1225/1225 [00:00<00:00, 3084.47it/s]



(  9/16) Z:\C1\free\vids\20201210_Neil\concatenated.avi frames:
[12756 25207 27834 81059]


100%|███████████████████████████████████████████████████████████████████████| 81060/81060 [00:23<00:00, 3384.01it/s]



( 10/16) Z:\C1\free\vids\20201210_Omar\concatenated.avi frames:
[ 62276  70915  78640  82882 105588]


100%|█████████████████████████████████████████████████████████████████████| 105589/105589 [00:31<00:00, 3395.53it/s]



( 11/16) Z:\C1\free\vids\20201211_Kyle\concatenated.avi frames:
[ 1492  5681  6272  9046  9350 14952 16303]


100%|███████████████████████████████████████████████████████████████████████| 16304/16304 [00:04<00:00, 3354.46it/s]



( 12/16) Z:\C1\free\vids\20201211_Lazy\concatenated.avi frames:
[ 1593  2048  3008  7682  9393 13581]


100%|███████████████████████████████████████████████████████████████████████| 13582/13582 [00:04<00:00, 3362.97it/s]



( 13/16) Z:\C1\free\vids\20201212_Neil\concatenated.avi frames:
[28440 44906 48636]


100%|███████████████████████████████████████████████████████████████████████| 48637/48637 [00:14<00:00, 3421.77it/s]



( 14/16) Z:\C1\free\vids\20201213_Kyle\concatenated.avi frames:
[ 1897  2787  4759  4815  5095  6181 13368 16829 17017 23016 26438 26703]


100%|███████████████████████████████████████████████████████████████████████| 26704/26704 [00:08<00:00, 3097.52it/s]



( 15/16) Z:\C1\free\vids\20201213_Lazy\concatenated.avi frames:
[ 3130  4524  6918 15167 23416]


100%|███████████████████████████████████████████████████████████████████████| 23417/23417 [00:07<00:00, 3334.03it/s]



( 16/16) Z:\C1\free\vids\20201219_Mark\concatenated.avi frames:
[ 1160  7832 21012 23200]


100%|███████████████████████████████████████████████████████████████████████| 23201/23201 [00:10<00:00, 2222.65it/s]


In [17]:
# make merged dataset
merge_new_images(
    datapath=old_dataset,
    merged_datapath=merged_dataset,
    images=images,
    keypoints=keypoints,
    overwrite=False
)

(118,
 Date           20201219
 Fish               Mark
 Frame             23200
 View(s)           FRONT
 Error      worm missing
 User                 DN
 Notes               NaN
 Name: 118, dtype: object)