This notebook is for timing TRT model inference vs non-TRT model inference

In [3]:
%%bash

echo 'Convert (Frozen Graph):'
CONVERT_TF_FROZEN_GRAPH_DIR=../../../tmp/trt_end_to_end/convert/tf/frozen
find ${CONVERT_TF_FROZEN_GRAPH_DIR} -type f | sort

Convert (Frozen Graph):
../../../tmp/trt_end_to_end/convert/tf/frozen/basic/001/basic_epoch001_2019-09-03T19:15_frozen.pb
../../../tmp/trt_end_to_end/convert/tf/frozen/batchn/001/batchn_epoch001_2019-09-03T19:28_frozen.pb
../../../tmp/trt_end_to_end/convert/tf/frozen/conv/001/conv_epoch001_2019-09-03T19:30_frozen.pb
../../../tmp/trt_end_to_end/convert/tf/frozen/resnet50/001/resnet50_epoch001_2019-09-03T19:31_frozen.pb


## Setup

In [4]:
import sys
sys.path.append('../../..')

## Parameters

In [5]:
_NAME = 'resnet50'
_EPOCH = 1

In [6]:
from src.utils.trt_end_to_end_constants import *
_NAME, _EPOCH, _TIME = get_params(_NAME, _EPOCH)

Metadata

In [7]:
from src.utils.trt_end_to_end_constants import MD_FILE_FORMAT

md_filename = MD_FILE_FORMAT % (_NAME, _EPOCH, _TIME)
_train_dir = get_train_dir(_NAME, _EPOCH)
md_filepath = os.path.join(_train_dir, md_filename)

from src.meta.metadata import Metadata
ret, metadata = Metadata.from_md(md_filepath)
assert ret == 0

from pprint import pprint
pprint(vars(metadata))

{'epoch': 1,
 'input_names': ['resnet50_input:0'],
 'name': 'resnet50',
 'output_names': ['fc100/Softmax:0']}


## Data

In [8]:
from src.data.cifar100 import CLASSES, INPUT_SHAPE, load_data
import numpy as np

(train_images, train_labels), (test_images, test_labels) = load_data()
input_img = np.array([train_images[0]])

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [9]:
input_img.shape

(1, 32, 32, 3)

## Helper functions

In [10]:
import tensorflow as tf
from tensorflow.python.platform import gfile
import time

# function to read a ".pb" model 
# (can be used to read frozen model or TensorRT model)
def read_pb_graph(model):
    with gfile.FastGFile(model,'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def

## Test Params

In [32]:
_WARM_UP_TRIALS = 1000
_TRIALS = 1000

## Frozen Graph

In [33]:
frozen_graph_dir = get_frozen_graph_dir(_NAME, _EPOCH)

frozen_graph_filename = FROZEN_GRAPH_FILE_FORMAT % (_NAME, _EPOCH, _TIME)
frozen_graph_filepath = os.path.join(frozen_graph_dir, frozen_graph_filename)
print(frozen_graph_filepath)

../../../tmp/trt_end_to_end/convert/tf/frozen/resnet50/001/resnet50_epoch001_2019-09-03T19:31_frozen.pb


In [35]:
graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        # read TensorRT model
        frozen_graph = read_pb_graph(frozen_graph_filepath)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(frozen_graph, name='')
        input = sess.graph.get_tensor_by_name(metadata.input_names[0])
        output = sess.graph.get_tensor_by_name(metadata.output_names[0])
        
        # warmup
        total_time = 0
        start = time.time()
        n = _WARM_UP_TRIALS
        print("Warming up for %d trials..." % n)
        last = None
        longest_line = 0
        for i in range(n):
            t1 = time.time()
            # CALL
            out_pred = sess.run(output, feed_dict={input: input_img})
            delta_time = time.time() - t1
            total_time += delta_time
            running_avg = (time.time() - start) / (i + 1)
            
            pctg = int(100 * i / n)
            if last is None or pctg > last:
                log = '['
                log += '=' * pctg
                log += '>'
                log += '.' * (100 - pctg - 1)
                log += ']'
                remaining_time = running_avg * (n - i - 1)
                log += (' ETA: %.3fs' % remaining_time)
                longest_line = max(len(log), longest_line)
                if len(log) < longest_line:
                    log += ' ' * (longest_line - len(log))
                print('\r', log, sep='', end='\r')
                last = pctg
        print()

        # test
        start = time.time()
        total_time = 0
        n = _TRIALS
        print("Testing for %d trials..." % n)
        last = None
        for i in range(n):
            t1 = time.time()
            # CALL
            out_pred = sess.run(output, feed_dict={input: input_img})
            delta_time = time.time() - t1
            total_time += delta_time
            running_avg = total_time / (i + 1)
            
            pctg = int(100 * i / n)
            if last is None or pctg > last:
                log = '['
                log += '=' * pctg
                log += '>'
                log += '.' * (100 - pctg - 1)
                log += ']'
                remaining_time = running_avg * (n - i - 1)
                log += (' ETA: %.3fs' % remaining_time)
                longest_line = max(len(log), longest_line)
                if len(log) < longest_line:
                    log += ' ' * (longest_line - len(log))
                print('\r', log, sep='', end='\r')
                last = pctg
        print()
            
        avg_time_original_model = total_time / n
        print("Old avg time: %s" % avg_time_original_model)

Warming up for 1000 trials...
Testing for 1000 trials...
Old avg time: 0.005598349332809448
