In [15]:
import onnx
import tensorrt as trt
import numpy as np

In [2]:

from onnx import helper, shape_inference
from onnx import TensorProto

In [37]:
import onnxruntime as ort
import pycuda.driver as cuda

In [4]:
MODEL_PATHS = {
    "128": "/home/beams/ABABU/ptychoNN-test/new_models/training4_1.8khz/ptychoNN_8.onnx",
    "512": "/home/beams/SKANDEL/code/anakha_ptychoNN-test/models_02_10_23/ptychoNN_8.onnx",
}

CURRENT_MODEL = "512"

In [5]:
def basic_check(model, print_graph: bool = False):

    # Check that the model is well formed
    onnx.checker.check_model(model)

    if print_graph:
        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(model.graph))

In [6]:
def shape_check(model, print_graph: bool = False):
    inferred_model = shape_inference.infer_shapes(model)
    onnx.checker.check_model(inferred_model)
    if print_graph:
        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(inferred_model.graph))
        #print(f"After shape inference, the shape info of Y is:\n{inferred_model.graph.value_info}")

In [7]:
model = onnx.load(MODEL_PATHS[CURRENT_MODEL])

In [8]:
basic_check(model)

In [10]:
sess = ort.InferenceSession(MODEL_PATHS[CURRENT_MODEL])

In [12]:
# get the name of the first input of the model
input_name = sess.get_inputs()[0].name  

print('Input Name:', input_name)

Input Name: input.1


In [20]:
inp = np.random.random((8, 1, 512, 512)).astype('float32')

In [22]:
outp = sess.run(None, {'input.1':inp})

In [24]:
np.shape(outp)

(1, 8, 1, 128, 128)

In [25]:
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH

<NetworkDefinitionCreationFlag.EXPLICIT_BATCH: 0>

In [26]:
1 * (1 << 30) 

1073741824

In [31]:
def engine_build_from_onnx(onnx_mdl):
    EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    builder = trt.Builder(TRT_LOGGER)
    config = builder.create_builder_config()
    # config.set_flag(trt.BuilderFlag.FP16)
    config.set_flag(trt.BuilderFlag.TF32)
    config.max_workspace_size = 1 * (1 << 50)  # the maximum size that any layer in the network can use

    network = builder.create_network(EXPLICIT_BATCH)
    parser = trt.OnnxParser(network, TRT_LOGGER)
    # Load the Onnx model and parse it in order to populate the TensorRT network.
    success = parser.parse_from_file(onnx_mdl)

    for idx in range(parser.num_errors):
        print(parser.get_error(idx))

    if not success:
        return None

    return builder.build_engine(network, config)


def mem_allocation(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.

    in_sz = trt.volume(engine.get_binding_shape(0)) * engine.max_batch_size
    h_input = cuda.pagelocked_empty(in_sz, dtype="float32")

    out_sz = trt.volume(engine.get_binding_shape(1)) * engine.max_batch_size
    h_output = cuda.pagelocked_empty(out_sz, dtype="float32")

    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()

    return h_input, h_output, d_input, d_output, stream

In [34]:
engine = engine_build_from_onnx(MODEL_PATHS["128"])

  config.max_workspace_size = 1 * (1 << 50)  # the maximum size that any layer in the network can use
  return builder.build_engine(network, config)


In [35]:
engine.max_batch_size

  engine.max_batch_size


1

In [38]:
mem_allocation(engine)

  in_sz = trt.volume(engine.get_binding_shape(0)) * engine.max_batch_size
  in_sz = trt.volume(engine.get_binding_shape(0)) * engine.max_batch_size


LogicError: explicit_context_dependent failed: invalid device context - no currently active context?