In [2]:
import cv2
import time
import numpy as np
import io
import torch
import torch.jit
import torch.nn as nn
from torch2trt import torch2trt, ConversionContext, TRTModule

from mmdet.apis import init_detector, inference_detector, inference_batch_detector
import tensorrt as trt
from mmdet.datasets import replace_ImageToTensor
from mmdet.datasets.pipelines import Compose
from mmcv.parallel import collate, scatter
from mmdet2trt import mmdet2trt
# from torch2trt_dynamic import torch2trt_dynamic



OSError: /home/ubuntu/oljike/progs/mmdetection-to-tensorrt/mmdet2trt/converters/plugins/libamirstan_plugin.so: cannot open shared object file: No such file or directory

In [7]:
class JerseyModel(torch.nn.Module):
    CHECKPOINT_FILENAME_PATTERN = 'model-{}.pth'

    # __constants__ = ['_hidden1', '_hidden2', '_hidden3', '_hidden4', '_hidden5',
    #                  '_hidden6', '_hidden7', '_hidden8', '_hidden9', '_hidden10',
    #                  '_features', '_classifier',
    #                  '_digit_length', '_digit1', '_digit2', '_digit3', '_digit4', '_digit5']

    def __init__(self, inter_size=7):
        super(JerseyModel, self).__init__()

        self.inter_size = inter_size
        self._hidden1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=48, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=48),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden2 = nn.Sequential(
            nn.Conv2d(in_channels=48, out_channels=64, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=160, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=160),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden5 = nn.Sequential(
            nn.Conv2d(in_channels=160, out_channels=192, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden6 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden7 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden8 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192, kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden9 = nn.Sequential(
            nn.Linear(192 * self.inter_size * self.inter_size, 3072),
            nn.ReLU()
        )
        self._hidden10 = nn.Sequential(
            nn.Linear(3072, 3072),
            nn.ReLU()
        )

        self._digit_length = nn.Sequential(nn.Linear(3072, 4))
        self._digit1 = nn.Sequential(nn.Linear(3072, 11))
        self._digit2 = nn.Sequential(nn.Linear(3072, 11))


    # @torch.jit.script_method
    def forward(self, x):
        # print(x.size())
        x = self._hidden1(x)
        x = self._hidden2(x)
        x = self._hidden3(x)
        x = self._hidden4(x)
        x = self._hidden5(x)
        x = self._hidden6(x)
        x = self._hidden7(x)
        x = self._hidden8(x)
        # print(x.size())
        x = x.view(x.size(0), 192 * self.inter_size * self.inter_size)
        x = self._hidden9(x)
        x = self._hidden10(x)

        length_logits = self._digit_length(x)
        digit1_logits = self._digit1(x)
        digit2_logits = self._digit2(x)


        return length_logits, digit1_logits, digit2_logits

    def store(self, path_to_dir, step, maximum=5):
        path_to_models = glob.glob(os.path.join(path_to_dir, Model.CHECKPOINT_FILENAME_PATTERN.format('*')))
        if len(path_to_models) == maximum:
            min_step = min([int(path_to_model.split('/')[-1][6:-4]) for path_to_model in path_to_models])
            path_to_min_step_model = os.path.join(path_to_dir, Model.CHECKPOINT_FILENAME_PATTERN.format(min_step))
            os.remove(path_to_min_step_model)

        path_to_checkpoint_file = os.path.join(path_to_dir, Model.CHECKPOINT_FILENAME_PATTERN.format(step))
        torch.save(self.state_dict(), path_to_checkpoint_file)
        return path_to_checkpoint_file

    def restore(self, path_to_checkpoint_file):
        self.load_state_dict(torch.load(path_to_checkpoint_file))
        step = int(path_to_checkpoint_file.split('/')[-1][6:-4])
        return step
    
def _infer_jersey(model, numpy_image):


    with torch.no_grad():
        transform = transforms.Compose([
            transforms.CenterCrop([54, 54]),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])
        
        numpy_image = cv2.resize(numpy_image, (64, 64))

        
        image = Image.fromarray(numpy_image)
        image = transform(image)
        images = image.unsqueeze(dim=0).cuda()

        length_logits, digit1_logits, digit2_logits= model.eval()(images)

        length_prediction = length_logits.max(1)[1]
        digit1_prediction = digit1_logits.max(1)[1]
        digit2_prediction = digit2_logits.max(1)[1]
  

    return [digit1_prediction.item(), digit2_prediction.item()]

In [8]:
# class_model = Model()
class_model = JerseyModel(7)
class_model.restore('../SVHNClassifier-PyTorch/work_dirs/basic_randaug/model-14000.pth')
class_model = class_model.eval().cuda()

In [21]:
# create example data
x = torch.rand((1, 3, 54, 54)).cuda()

# convert to TensorRT feeding sample data as input
model_trt = torch2trt(class_model, [x], use_onnx=True)

In [22]:
x_trt = torch.rand((2, 3, 54, 54)).cuda()

st = time.time()
for i in range(1000):
    r = model_trt(x_trt)
print(time.time() - st)

0.1912069320678711


In [23]:
x_orig = torch.rand((2, 3, 54, 54)).cuda()


st = time.time()
for i in range(1000):
    r = class_model(x_orig)
print(time.time() - st)

2.9165523052215576


In [16]:
### Config and model weights path
config_file = '/home/ubuntu/oljike/BallTracking/mmdetection/configs/yolo_jersey/yolov3_d53_320_273e_jersey.py'
checkpoint_file = '/home/ubuntu/oljike/BallTracking/mmdetection/work_dirs/jersey_region_yolov3-320/epoch_80.pth'

# build the model from a config file and a checkpoint file
det_model = init_detector(config_file, checkpoint_file, device='cuda:0')

In [17]:
def infer_det(model, imglist):
    

    is_batch = False
    if isinstance(imglist, list):
        is_batch = True
    else:
        imglist = [imglist]

    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    results = []

    if isinstance(imglist[0], np.ndarray):
        cfg = cfg.copy()
        # set loading pipeline type
        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'

    cfg.data.test.samples_per_gpu = len(imglist)
    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    test_pipeline = Compose(cfg.data.test.pipeline)

    datalist = []
    for img in imglist:
        # prepare data
        if isinstance(img, np.ndarray):
            # directly add img
            data = dict(img=img)
        else:
            # add information into dict
            data = dict(img_info=dict(filename=img), img_prefix=None)
        # build the data pipeline
        data = test_pipeline(data)
        datalist.append(data)

    data = collate(datalist, samples_per_gpu=len(imglist))
    # just get the actual data from DataContainer
    data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]
    data['img'] = [img.data[0] for img in data['img']]
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        for m in model.modules():
            assert not isinstance(
                m, RoIPool
            ), 'CPU inference with RoIPool is not supported currently.'
    
    return data
        
imglist = [np.ones((224,224,3)), np.ones((224,224,3))]
data = infer_det(det_model, imglist)

    # forward the model
with torch.no_grad():
    results = det_model(return_loss=False, rescale=True, **data)

In [61]:
def default_input_names(num_inputs):
    return ["input_%d" % i for i in range(num_inputs)]

def default_output_names(num_outputs):
    return ["output_%d" % i for i in range(num_outputs)]

def convert(module, 
              inputs, 
              input_names=None, 
              output_names=None, 
              log_level=trt.Logger.ERROR, 
              max_batch_size=1,
              fp16_mode=False, 
              max_workspace_size=1<<25, 
              strict_type_constraints=False, 
              keep_network=True, 
              int8_mode=False, 
              int8_calib_dataset=None,
              int8_calib_algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2,
              int8_calib_batch_size=1,
              use_onnx=True):

#     inputs_in = inputs

#     # copy inputs to avoid modifications to source data
#     inputs = [tensor.clone()[0:1] for tensor in inputs]  # only run single entry

    logger = trt.Logger(log_level)
    builder = trt.Builder(logger)
    
#     if isinstance(inputs, list):
#         inputs = tuple(inputs)
#     if not isinstance(inputs, tuple):
#         inputs = (inputs,)
        
    # run once to get num outputs
    outputs = module(return_loss=False, rescale=True, **inputs)
    if not isinstance(outputs, tuple) and not isinstance(outputs, list):
        outputs = (outputs,)
        
    if input_names is None:
        input_names = default_input_names(len(inputs))
    if output_names is None:
        output_names = default_output_names(len(outputs))
        
    if use_onnx:
        f = io.BytesIO()
        torch.onnx.export(module, **inputs, f, input_names=input_names, output_names=output_names)
        f.seek(0)
        onnx_bytes = f.read()
        network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        parser = trt.OnnxParser(network, logger)
        parser.parse(onnx_bytes)


    builder.max_workspace_size = max_workspace_size
    builder.fp16_mode = fp16_mode
    builder.max_batch_size = max_batch_size
    builder.strict_type_constraints = strict_type_constraints


    engine = builder.build_cuda_engine(network)
    module_trt = TRTModule(engine, input_names, output_names)
    
    
convert(det_model, data)

SyntaxError: positional argument follows keyword argument unpacking (<ipython-input-61-2b30b48daf71>, line 49)

In [56]:
data['img'][0]

tensor([[[[0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          ...,
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039]],

         [[0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          ...,
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039]],

         [[0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
          [0.0039, 0.0039, 0.0039,  ..., 0