In [22]:
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
#!{sys.executable} -m pip install pycuda
import pycuda.driver as cuda
import numpy as np
import pycuda.autoinit
import cv2

In [23]:
with open("model/blueA_VGG16.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

In [24]:
im = cv2.imread("blueAPCENBLEDval2048spec/bat/20150808T085348.13641700.13691700.sel.57.ch01.spectrogram.jpg")

In [32]:
width, height = 224, 224
binding_shape = (width, height)
print(trt.volume(engine.get_binding_shape(0)))

150528


In [26]:
'''
set host input by transorming spectrogram to fit for cuda interfacing:

init. dim   |    reshape    |   transpose   |   flatten
____________|_______________|_______________|______________
(299,299,3) |-> (224,224,3) |-> (3,224,224) | -> (150528,)

NOTE: pixel weights must be float32

'''
h_input = np.reshape(np.transpose(cv2.resize(im,binding_shape)),
                     trt.volume(engine.get_binding_shape(0))).astype('float32')

In [27]:
'''
Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) 
to hold host inputs/outputs.
'''
#h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=np.float32)
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float32)
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()

In [29]:
with engine.create_execution_context() as context:
    # Transfer input data to the GPU.
    cuda.memcpy_htod_async(d_input, h_input, stream)
    # Run inference.
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    # Synchronize the stream
    stream.synchronize()
    # Return the host output. 
print(h_output)


[0.7762563  0.22374369]
