In [1]:
import argparse

import cv2
import numpy as np
import torch

from vedacore.image import imread, imwrite
from vedacore.misc import Config, color_val, load_weights
from vedacore.parallel import collate, scatter
from vedadet.datasets.pipelines import Compose
from vedadet.engines import build_engine

import time

In [2]:
def setup():
    cfg = Config.fromfile('configs/infer/tinaface/tinaface_r50_fpn_gn_dcn.py')
    device = torch.cuda.current_device()
    engine = build_engine(cfg.infer_engine)

    engine.model.to(device)
    load_weights(engine.model, cfg.weights.filepath)

    data_pipeline = Compose(cfg.data_pipeline)
    return engine, data_pipeline, device

In [3]:
def detect(engine, data_pipeline, device, imgname):
    data = dict(img_info=dict(filename=imgname), img_prefix=None)
    data = data_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    data = scatter(data, [device])[0]

    result = engine.infer(data['img'], data['img_metas'])[0]

In [14]:
def measure_detect(engine, data_pipeline, device, imgname):
    sum = 0
    n = 25
    for i in range(n):
        start_time = time.time()
        detect(engine, data_pipeline, device, imgname)
        sum += (time.time() - start_time) * 1000
    return sum / n
    

In [15]:
engine, data_pipeline, device = setup()

In [20]:
single_face_execution_time = measure_detect(engine, data_pipeline, device, 'images/1.360.jpeg')
multiple_faces_execution_time = measure_detect(engine, data_pipeline, device, 'images/2.360.jpeg')
print("360\nSingle Face: %s ms\nMultiple Faces: %s ms" %(single_face_execution_time, multiple_faces_execution_time))

360
Single Face: 478.89771461486816 ms
Multiple Faces: 487.8075408935547 ms


In [21]:
single_face_execution_time = measure_detect(engine, data_pipeline, device, 'images/1.sd.jpeg')
multiple_faces_execution_time = measure_detect(engine, data_pipeline, device, 'images/2.sd.jpeg')
print("SD\nSingle Face: %s ms\nMultiple Faces: %s ms" %(single_face_execution_time, multiple_faces_execution_time))

SD
Single Face: 488.17490577697754 ms
Multiple Faces: 501.1666774749756 ms


In [22]:
single_face_execution_time = measure_detect(engine, data_pipeline, device, 'images/1.hd.jpeg')
multiple_faces_execution_time = measure_detect(engine, data_pipeline, device, 'images/2.hd.jpeg')
print("HD\nSingle Face: %s ms\nMultiple Faces: %s ms" %(single_face_execution_time, multiple_faces_execution_time))

HD
Single Face: 495.3373336791992 ms
Multiple Faces: 505.45223236083984 ms


In [23]:
single_face_execution_time = measure_detect(engine, data_pipeline, device, 'images/1.fullhd.jpeg')
multiple_faces_execution_time = measure_detect(engine, data_pipeline, device, 'images/2.fullhd.jpeg')
print("Full HD\nSingle Face: %s ms\nMultiple Faces: %s ms" %(single_face_execution_time, multiple_faces_execution_time))

Full HD
Single Face: 504.89065170288086 ms
Multiple Faces: 516.646900177002 ms


In [24]:
single_face_execution_time = measure_detect(engine, data_pipeline, device, 'images/1.4k.jpeg')
multiple_faces_execution_time = measure_detect(engine, data_pipeline, device, 'images/2.4k.jpeg')
print("4K\nSingle Face: %s ms\nMultiple Faces: %s ms" %(single_face_execution_time, multiple_faces_execution_time))

4K
Single Face: 585.107307434082 ms
Multiple Faces: 607.2097778320312 ms
