In [1]:
# Disable tensorflow spam (needs to happen before tensorflow gets imported)
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # set to "2" to see TensorRT errors
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("tensorflow").addHandler(logging.NullHandler(logging.ERROR))

import numpy as np
import tensorflow as tf
import cv2
from time import perf_counter
from trtutils import OptimizedModel
import system
import argparse
import pandas as pd

system.disable_preallocation()
# system.summary()


def log_result(data, filename="benchmark.csv"):
    """Append a dictionary of scalar keys as a row to a CSV file."""
    df = pd.DataFrame({k: [v] for k, v in data.items()})
    
    if os.path.exists(filename):
        df = pd.concat([
            pd.read_csv(filename),
            df,
        ])
    df.to_csv(filename, index=False)


def read_frames(video_path, fidxs=None, grayscale=True):
    """Read frames from a video file.
    
    Args:
        video_path: Path to MP4
        fidxs: List of frame indices or None to read all frames (default: None)
        grayscale: Keep only one channel of the images (default: True)
    
    Returns:
        Loaded images in array of shape (n_frames, height, width, channels) and dtype uint8.
    """
    vr = cv2.VideoCapture(video_path)
    if fidxs is None:
        fidxs = np.arange(vr.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []
    for fidx in fidxs:
        vr.set(cv2.CAP_PROP_POS_FRAMES, fidx)
        img = vr.read()[1]
        if grayscale:
            img = img[:, :, [0]]
        frames.append(img)
    return np.stack(frames, axis=0)

In [2]:
%%time
opt_model_path, save_path = "data/inference_td_trt_FP16", "latency.inference_td_trt_FP16.csv"
# opt_model_path, save_path = "data/inference_td_trt_FP32", "latency.inference_td_trt_FP32.csv"
test_data = "data/190719_090330_wt_18159206_rig1.2@15000-17560.mp4"
batch_sizes = [1, 16]

reps = 5
N = 1280

imgs = read_frames(test_data, np.arange(N))
opt_model = OptimizedModel(saved_model_dir=opt_model_path)

res = None
for batch_size in batch_sizes:
    dts = []
    for rep in range(reps + 1):
        for i in range(0, N, batch_size):
            t0 = perf_counter()
            opt_model.predict(imgs[i:(i+batch_size)])
            dt = perf_counter() - t0
            if rep > 0:
                dts.append(dt)
    dts = np.array(dts)
    
    res_ = pd.DataFrame({"batch_size": np.full(dts.shape, batch_size), "dts": dts})
    if res is None:
        res = res_
    else:
        res = pd.concat([res, res_])

res["fps"] = res["batch_size"] / res["dts"]
res.to_csv(save_path, index=False)

CPU times: user 14min 14s, sys: 36.7 s, total: 14min 51s
Wall time: 4min 21s


In [3]:
res.groupby("batch_size").mean()

Unnamed: 0_level_0,dts,fps
batch_size,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.003359,298.019274
16,0.020879,766.506251
