In [1]:
import numpy as np
import torch
import time
from torchvision.models import *
import pandas as pd
import os
import torchvision
from torch2trt import torch2trt

In [2]:
from torchvision.models.segmentation import *

In [3]:
FP32 = True
FP16 = True
INT8 = True

In [4]:
# make results
os.makedirs("results", exist_ok=True)

In [5]:
class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(ModelWrapper, self).__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)['out']

In [6]:
def computeTime(model, input_size=[1, 3, 224, 224], device='cuda', FP16=False):
    inputs = torch.randn(input_size)
    if device == 'cuda':
        model = model.cuda()
        inputs = inputs.cuda()
    if FP16:
        model = model.half()
        inputs = inputs.half()

    model.eval()

    i = 0
    time_spent = []
    while i < 200:
        start_time = time.time()
        with torch.no_grad():
            _ = model(inputs)

        if device == 'cuda':
            torch.cuda.synchronize() # wait for cuda to finish (cuda is asynchronous!)
        if i != 0:
            time_spent.append(time.time() - start_time)
        i += 1
    print('Avg execution time (ms): {:.3f}'.format(np.mean(time_spent)))
    return np.mean(time_spent)

In [7]:
# resnet is enought for now
modellist = ["fcn_resnet50", "fcn_resnet101", "deeplabv3_resnet50", "deeplabv3_resnet101"]

In [9]:
results = []
for i, model_name in enumerate(modellist):
    runtimes = []

    # define model
    print("model: {}".format(model_name))
    input_size = [1, 3, 512, 512]
    mdl = globals()[model_name]
    model = mdl().cuda().eval()
    # Run raw models
    runtimes.append(computeTime(model, input_size=input_size, device="cuda", FP16=False))

    if FP32:    
        mdl = globals()[model_name]
        model = mdl().cuda().eval()
        model_w = ModelWrapper(model)
        x = torch.zeros(input_size).cuda()

        # convert to tensorrt models
        model_trt = torch2trt(model_w, [x])

        # Run TensorRT models
        runtimes.append(computeTime(model_trt, input_size=input_size, device="cuda", FP16=False))
    if FP16:
        print("running fp16 models..")
        # Make FP16 tensorRT models
        mdl = globals()[model_name]
        model = mdl().eval().half().cuda()
        model_w = ModelWrapper(model).half()
        x = torch.zeros(input_size).half().cuda()
        # convert to tensorrt models
        model_trt = torch2trt(model_w, [x], fp16_mode=True)
        # Run TensorRT models
        runtimes.append(computeTime(model_trt, input_size=input_size, device="cuda", FP16=True))

    if INT8:
        print("running int8 models..")
        # Make INT8 tensorRT models
        mdl = globals()[model_name]
        model = mdl().eval().half().cuda()
        model_w = ModelWrapper(model).half()
        x = torch.randn(input_size).half().cuda()
        # convert to tensorrt models
        model_trt = torch2trt(model_w, [x], fp16_mode=True, int8_mode=True, max_batch_size=1)

        runtimes.append(computeTime(model_trt, input_size=input_size, device="cuda", FP16=True))

    if i == 0:
        df = pd.DataFrame({model_name: runtimes},
                         index = ["Raw", "FP32", "FP16", "INT8"])
    else:
        df[model_name] = runtimes

model: fcn_resnet50
Avg execution time (ms): 0.205
Avg execution time (ms): 0.174
running fp16 models..
Avg execution time (ms): 0.037
running int8 models..
Avg execution time (ms): 0.022
model: fcn_resnet101


Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /home/ken/.cache/torch/checkpoints/resnet101-5d3b4d8f.pth
100.0%


Avg execution time (ms): 0.344
Avg execution time (ms): 0.290
running fp16 models..
Avg execution time (ms): 0.057
running int8 models..
Avg execution time (ms): 0.032
model: deeplabv3_resnet50
Avg execution time (ms): 0.281
Avg execution time (ms): 0.252
running fp16 models..
Avg execution time (ms): 0.130
running int8 models..
Avg execution time (ms): 0.097
model: deeplabv3_resnet101
Avg execution time (ms): 0.426
Avg execution time (ms): 0.367
running fp16 models..
Avg execution time (ms): 0.151
running int8 models..
Avg execution time (ms): 0.108


In [10]:
df.to_csv("results/xavier_segmentation.csv")
df

Unnamed: 0,fcn_resnet50,fcn_resnet101,deeplabv3_resnet50,deeplabv3_resnet101
Raw,0.205359,0.344307,0.281023,0.42596
FP32,0.173818,0.29018,0.252314,0.366532
FP16,0.036635,0.056922,0.129868,0.151195
INT8,0.021869,0.032292,0.097351,0.108282
