In [1]:
import torch
from flytracker.io.dataset import DataLoader

from flytracker.tracker import _initialize, _localize
from flytracker.preprocessing.preprocessing import (
    preprocessing_kmeans,
    preprocessing_blob,
)
from flytracker.localization.blob import localize_blob, default_blob_detector_params
from flytracker.localization.kmeans import (
    localize_kmeans_torch,
    localize_kmeans_sklearn,
)
from time import time
import numpy as np
%load_ext autoreload
%autoreload 2

In [2]:
%pwd

'/home/gert-jan/Documents/flyTracker/tests'

In [3]:
%cd "/home/gert-jan/Documents/flyTracker/"

/home/gert-jan/Documents/flyTracker


In [4]:
%pwd

'/home/gert-jan/Documents/flyTracker'

In [5]:
movie_path = "data/experiments/bruno/videos/seq_1.mp4"
n_frames = 5000

mask = torch.ones((1080, 1280), dtype=bool)
mask[:130, :] = 0
mask[-160:, :] = 0
mask[:, :270] = 0
mask[:, -205:] = 0

mask[:190, :350] = 0
mask[:195, -270:] = 0
mask[-220:, :340] = 0
mask[870:, 1010:] = 0

In [9]:
# Running parallel gpu
start = time()
loader = DataLoader(movie_path, parallel=True)

preprocessor_ini = preprocessing_blob(mask)
localize_ini = localize_blob(default_blob_detector_params())
initial_position, initial_frame = _initialize(
    loader, preprocessor_ini, localize_ini, 100
)

preprocessor_main = preprocessing_kmeans(mask, device="cuda")
localize_main = localize_kmeans_torch(120, 1e-4, "cuda")
locs_gpu = _localize(
    loader, preprocessor_main, localize_main, initial_position, n_frames, "cuda"
)
loader.dataset.reader.stop()
stop = time()
print(f"Time for GPU run: {stop - start}s")

Done with frame 0
Done with frame 1000
Done with frame 2000
Done with frame 3000
Done with frame 4000
Done with frame 5000
Time for GPU run: 15.269138813018799s


In [8]:
loader.dataset.reader.stop()

Now for the cpu:

In [7]:
# Running parallel sklearn
# cpu version doesn't work with parallel laoding for some reason
start = time()
dataset = VideoDataset(movie_path, parallel=False)
loader = DataLoader(dataset, batch_size=None, pin_memory=True)

preprocessor_ini = preprocessing_blob(mask)
localize_ini = localize_blob(default_blob_detector_params())
initial_position, initial_frame = _initialize(
    loader, preprocessor_ini, localize_ini, 100
)

preprocessor_main = preprocessing_kmeans(mask, device="cpu")
localize_main = localize_kmeans_sklearn(120, 1e-4)
locs_cpu = _localize(
    loader, preprocessor_main, localize_main, initial_position, n_frames, "cpu"
)
stop = time()
print(f"Time for CPU run: {stop - start}s")


Done with frame 0
Done with frame 1000
Done with frame 2000
Done with frame 3000
Done with frame 4000
Done with frame 5000
Time for CPU run: 43.508383989334106s


In [9]:
np.max(np.stack(locs_cpu) - torch.stack(locs_gpu).cpu().numpy())

1.607823689778911