In [1]:
%%writefile benchmark.py
"""This script is written from benchmark.ipynb. Do not edit directly."""

# Disable tensorflow spam (needs to happen before tensorflow gets imported)
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # set to "2" to see TensorRT errors
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("tensorflow").addHandler(logging.NullHandler(logging.ERROR))

import argparse
import numpy as np
import pandas as pd
import tensorflow as tf
import system
from video import read_frames
from trtutils import OptimizedModel
from time import perf_counter


system.disable_preallocation()


def log_result(data, filename="benchmark.csv"):
    """Append a dictionary of scalar keys as a row to a CSV file."""
#     data = pd.DataFrame({k: [v] for k, v in data.items()})
    if os.path.exists(filename):
        data = pd.concat([
            pd.read_csv(filename),
            data,
        ])
    data.to_csv(filename, index=False)


def benchmark(model, precision, test_data_path, n_frames, batch_size, reps, grayscale):
    trt_model_path = f"{model}/trtmodel_FP{precision}"

    imgs = read_frames(test_data_path, np.arange(n_frames), grayscale=grayscale)
    trt_model = OptimizedModel(saved_model_dir=trt_model_path)

    res = None
    dts = []
    for rep in range(reps + 1):
        for i in range(0, n_frames, batch_size):
            t0 = perf_counter()
            preds = trt_model.predict(imgs[i:(i+batch_size)], numpy=True)
            dt = perf_counter() - t0
            if rep > 0:
                dts.append(dt)
    dts = np.array(dts)

    res_ = pd.DataFrame({"batch_size": np.full(dts.shape, batch_size), "dts": dts})
    if res is None:
        res = res_
    else:
        res = pd.concat([res, res_])

    res["fps"] = res["batch_size"] / res["dts"]
    res["model"] = model
    res["precision"] = precision
    res["trt_model_path"] = trt_model_path
    res["test_data_path"] = test_data_path
    res["n_frames"], res["img_height"], res["img_width"] = imgs.shape[:-1]
    
    log_result(res, filename="trt_benchmarks.csv")
    
    print(res.groupby("batch_size")[["fps", "dts"]].agg(["mean", "std"]).to_string())
    

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("model", type=str)
    parser.add_argument("precision", type=str)
    parser.add_argument("test_data", type=str)
    parser.add_argument("--n_frames", type=int, default=1280)
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--reps", type=int, default=5)
    parser.add_argument("--grayscale", dest="grayscale", action="store_true")
    parser.add_argument("--no-grayscale", dest="grayscale", action="store_false")
    parser.set_defaults(grayscale=True)
    args = parser.parse_args()

    benchmark(args.model, args.precision, args.test_data, args.n_frames, args.batch_size, args.reps, args.grayscale)

Overwriting benchmark.py


In [2]:
import pandas as pd

mice_of_centroid = "sleap-data/datasets/wang_4mice_john/labels.full/models.random_split1/centroid.210507_132508.centroid.n=900"
mice_of_td = "sleap-data/experiments/best_model_replicates/mice_of_td.210509_231024.centered_instance.n=900"
mice_of_clips = """
sleap-data/datasets/wang_4mice_john/clips/OFTephys-0055-08@14616-18736.mp4
sleap-data/datasets/wang_4mice_john/clips/OFTsocialgroup-0000-00@117000-121700.mp4
sleap-data/datasets/wang_4mice_john/clips/OFTsocial5mice-0000-00.mp4.old.predictions@3700-6260.mp4
""".strip().split()


flies13_centroid = "sleap-data/datasets/wt_gold.13pt/models.tracking_split2/centroid.fast.210504_182918.centroid.n=1800"
flies13_td = "sleap-data/datasets/wt_gold.13pt/models.tracking_split2/td_fast.210505_012601.centered_instance.n=1800"
flies13_td_id = "sleap-data/datasets/wt_gold.13pt/models.tracking_split2/td_id.fast.v2.210519_111253.multi_class_topdown.n=1800"
flies13_clips = """
sleap-data/datasets/wt_gold.13pt/clips/single_fly@100000-110000.mp4
sleap-data/datasets/wt_gold.13pt/clips/190719_090330_wt_18159206_rig1.2@15000-17560.mp4
sleap-data/datasets/wt_gold.13pt/clips/three_flies@8800-16800.mp4
sleap-data/datasets/wt_gold.13pt/clips/four_flies@1000-11000.mp4
sleap-data/datasets/wt_gold.13pt/clips/eight_flies@180000-200000.mp4
""".strip().split()


fly32_single = "sleap-data/datasets/BermanFlies/models.random_split1/single.fast_unet32.210524_171130.single_instance.n=1350"
fly32_clips = ["sleap-data/datasets/BermanFlies/clips/072212_163153@10000-13200.mp4"]


gerbils_centroid = "sleap-data/datasets/nyu-gerbils/cohort1_compressedTalmo_23vids_march_7_to_march_17/models.random_split1.day001/centroid.210504_225945.centroid.n=383"
gerbils_td_id = "sleap-data/datasets/nyu-gerbils/cohort1_compressedTalmo_23vids_march_7_to_march_17/models.random_split1.day001/td_id.210505_002058.multi_class_topdown.n=383"
gerbils_clips = ["sleap-data/datasets/nyu-gerbils/clips/2020-3-10_daytime_5mins_compressedTalmo@3200-5760.mp4"]


test_data_info = pd.DataFrame([
    {"test_data_path": "sleap-data/datasets/wang_4mice_john/clips/OFTephys-0055-08@14616-18736.mp4", "dataset": "mice_of", "n_instances": 2},
    {"test_data_path": "sleap-data/datasets/wang_4mice_john/clips/OFTsocialgroup-0000-00@117000-121700.mp4", "dataset": "mice_of", "n_instances": 4},
    {"test_data_path": "sleap-data/datasets/wang_4mice_john/clips/OFTsocial5mice-0000-00.mp4.old.predictions@3700-6260.mp4", "dataset": "mice_of", "n_instances": 5},
    {"test_data_path": "sleap-data/datasets/BermanFlies/clips/072212_163153@10000-13200.mp4", "dataset": "fly32", "n_instances": 1},
    {"test_data_path": "sleap-data/datasets/wt_gold.13pt/clips/single_fly@100000-110000.mp4", "dataset": "flies13", "n_instances": 1},
    {"test_data_path": "sleap-data/datasets/wt_gold.13pt/clips/190719_090330_wt_18159206_rig1.2@15000-17560.mp4", "dataset": "flies13", "n_instances": 2},
    {"test_data_path": "sleap-data/datasets/wt_gold.13pt/clips/three_flies@8800-16800.mp4", "dataset": "flies13", "n_instances": 3},
    {"test_data_path": "sleap-data/datasets/wt_gold.13pt/clips/four_flies@1000-11000.mp4", "dataset": "flies13", "n_instances": 4},
    {"test_data_path": "sleap-data/datasets/wt_gold.13pt/clips/eight_flies@180000-200000.mp4", "dataset": "flies13", "n_instances": 8},
    {"test_data_path": "sleap-data/datasets/nyu-gerbils/clips/2020-3-10_daytime_5mins_compressedTalmo@3200-5760.mp4", "dataset": "gerbils", "n_instances": 4},
])

In [3]:
precisions = [16, 32]
batch_sizes = [1, 4, 8, 16, 32]
n_frames = 1280
reps = 3

model, clips = mice_of_td, mice_of_clips
# model, clips = flies13_td, flies13_clips
# model, clips = flies13_td_id, flies13_clips
# model, clips = fly32_single, fly32_clips
# model, clips = gerbils_td_id, gerbils_clips

for test_data in clips:
    for precision in precisions:
        for batch_size in batch_sizes:
            !python benchmark.py "{model}" {precision} "{test_data}" --n_frames {n_frames} --batch_size {batch_size} --reps {reps}

                   fps                dts          
                  mean      std      mean       std
batch_size                                         
1           169.278934  6.64302  0.005917  0.000249
                   fps                dts          
                  mean      std      mean       std
batch_size                                         
4           278.845459  7.04976  0.014354  0.000379
                  fps                 dts          
                 mean       std      mean       std
batch_size                                         
8           331.66588  8.140703  0.024136  0.000625
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
16          357.387276  12.877239  0.044831  0.001713
                   fps                 dts          
                  mean       std      mean       std
batch_size                                          
3

In [4]:
precisions = [16, 32]
batch_sizes = [1, 4, 8, 16, 32]
n_frames = 1280
reps = 3


# model, clips = mice_of_td, mice_of_clips
model, clips = flies13_td, flies13_clips
# model, clips = flies13_td_id, flies13_clips
# model, clips = fly32_single, fly32_clips
# model, clips = gerbils_td_id, gerbils_clips

for test_data in clips:
    for precision in precisions:
        for batch_size in batch_sizes:
            !python benchmark.py "{model}" {precision} "{test_data}" --n_frames {n_frames} --batch_size {batch_size} --reps {reps}

                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
1           295.917852  11.397061  0.003385  0.000155
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
4           649.787588  18.041041  0.006161  0.000192
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
8           766.379651  25.880774  0.010452  0.000388
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
16          855.653826  15.855068  0.018706  0.000357
                   fps                  dts          
                  mean        std      mean       std
batch_size                  

In [5]:
precisions = [16, 32]
batch_sizes = [1, 4, 8, 16, 32]
n_frames = 1280
reps = 3


# model, clips = mice_of_td, mice_of_clips
# model, clips = flies13_td, flies13_clips
model, clips = flies13_td_id, flies13_clips[:2]
# model, clips = fly32_single, fly32_clips
# model, clips = gerbils_td_id, gerbils_clips

for test_data in clips:
    for precision in precisions:
        for batch_size in batch_sizes:
            !python benchmark.py "{model}" {precision} "{test_data}" --n_frames {n_frames} --batch_size {batch_size} --reps {reps}

                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
1           297.250088  10.009731  0.003368  0.000127
                   fps                  dts         
                  mean        std      mean      std
batch_size                                          
4           662.832568  22.039478  0.006043  0.00024
                   fps                 dts          
                  mean        std     mean       std
batch_size                                          
8           787.268211  21.909089  0.01017  0.000302
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
16          859.997533  53.326481  0.018685  0.001307
                   fps                  dts          
                  mean        std      mean       std
batch_size                          

In [6]:
precisions = [16, 32]
batch_sizes = [1, 4, 8, 16, 32, 64, 128, 256]
n_frames = 1280
reps = 3


# model, clips = mice_of_td, mice_of_clips
# model, clips = flies13_td, flies13_clips
# model, clips = flies13_td_id, flies13_clips
model, clips = fly32_single, fly32_clips
# model, clips = gerbils_td_id, gerbils_clips

for test_data in clips:
    for precision in precisions:
        for batch_size in batch_sizes:
            !python benchmark.py "{model}" {precision} "{test_data}" --n_frames {n_frames} --batch_size {batch_size} --reps {reps}

                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
1           632.322348  24.383844  0.001584  0.000079
                    fps                 dts          
                   mean       std      mean       std
batch_size                                           
4           1474.737768  40.24747  0.002715  0.000091
                    fps                  dts          
                   mean        std      mean       std
batch_size                                            
8           1970.495766  51.491371  0.004063  0.000119
                    fps                  dts          
                   mean        std      mean       std
batch_size                                            
16          2200.596268  61.808939  0.007277  0.000216
                    fps                  dts          
                   mean        std      mean       std
batch_size        

In [9]:
precisions = [16, 32]
batch_sizes = [1, 4, 8, 16]
n_frames = 1280
reps = 3


# model, clips = mice_of_td, mice_of_clips
# model, clips = flies13_td, flies13_clips
# model, clips = flies13_td_id, flies13_clips
# model, clips = fly32_single, fly32_clips
model, clips = gerbils_td_id, gerbils_clips

for test_data in clips:
    for precision in precisions:
        for batch_size in batch_sizes:
            !python benchmark.py "{model}" {precision} "{test_data}" --n_frames {n_frames} --batch_size {batch_size} --reps {reps} --no-grayscale

                  fps                 dts          
                 mean       std      mean       std
batch_size                                         
1           87.838946  7.516676  0.011466  0.000967
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
4           140.457951  17.214821  0.028924  0.003682
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
8           148.758871  16.660236  0.054456  0.006131
                   fps                  dts          
                  mean        std      mean       std
batch_size                                           
16          136.998672  14.553428  0.118088  0.012382
                  fps                 dts          
                 mean       std      mean       std
batch_size                              

In [11]:
all_benchmarks = pd.read_csv("trt_benchmarks.csv")
all_benchmarks = all_benchmarks.merge(test_data_info, on="test_data_path")
all_benchmarks.to_csv("all_trt_benchmarks.csv", index=False)

In [12]:
all_benchmarks.query("precision == 16").groupby(["dataset", "n_instances", "batch_size"])[["fps"]].agg(["mean", "std"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fps,fps
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
dataset,n_instances,batch_size,Unnamed: 3_level_2,Unnamed: 4_level_2
flies13,1,1,299.104987,11.833367
flies13,1,4,658.630498,23.976645
flies13,1,8,767.357013,28.194388
flies13,1,16,856.987101,41.06758
flies13,1,32,752.261939,20.246201
flies13,2,1,290.552086,11.926836
flies13,2,4,596.115711,27.436143
flies13,2,8,686.648726,32.995086
flies13,2,16,783.133509,30.55122
flies13,2,32,689.98888,22.070296


In [13]:
all_benchmarks.query("precision == 32").groupby(["dataset", "n_instances", "batch_size"])[["fps"]].agg(["mean", "std"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fps,fps
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
dataset,n_instances,batch_size,Unnamed: 3_level_2,Unnamed: 4_level_2
flies13,1,1,260.763776,8.125511
flies13,1,4,467.721571,20.858988
flies13,1,8,531.06372,21.079892
flies13,1,16,581.277977,21.588219
flies13,1,32,531.937695,14.929899
flies13,2,1,244.734672,9.55621
flies13,2,4,426.792082,17.355492
flies13,2,8,483.555779,24.116494
flies13,2,16,526.598531,26.866633
flies13,2,32,486.490824,23.037296
