In [6]:
import onnxruntime
"""ONNX Runtime is a performance-focused engine for ONNX models, which 
inferences efficiently across multiple platforms and hardware (Windows, 
Linux, and Mac and on both CPUs and GPUs)."""
import numpy as np

In [7]:
# Load the ONNX model
model_wo_params = onnxruntime.InferenceSession("/home/nvidia/dev/img_new/resnet_v1.onnx", 
                                               providers=['CPUExecutionProvider', 'AzureExecutionProvider'])
model_with_params = onnxruntime.InferenceSession("/home/nvidia/dev/img_new/resnet_v1_with_params.onnx",
                                                 providers=['CPUExecutionProvider', 'AzureExecutionProvider'])

In [8]:
img = np.ones((1,3,160,160),dtype=np.float32)


In [9]:
# Run inference
output_wo_params = np.array(model_wo_params.run(None, {"input": img})[0][0])
output_with_params = np.array(model_with_params.run(None, {"input": img})[0][0])

In [21]:
np.linalg.norm(output_wo_params)

1.0

In [10]:
out_orig_pt = np.load("/home/nvidia/dev/img_new/scripts/ones_orig.npy")
ones_triton = np.load('ones_new_triton.npy')
out_orig_pt.shape

(1, 512)

In [13]:
out_pt = out_orig_pt[0]
np.linalg.norm(ones_triton - output_wo_params)

0.019887049

In [5]:
import onnx
import onnx.checker


# Load the ONNX model
model = onnx.load("/home/nvidia/dev/img_new/resnet_v1_with_params.onnx")

# Check the model for errors
onnx.checker.check_model(model)

print(type(model))

# # Print the model's input and output nodes
# print(checker.get_input_names())
# print(checker.get_output_names())

<class 'onnx.onnx_ml_pb2.ModelProto'>


In [32]:
import onnx
import onnx.helper

# Load the ONNX model
model = onnx.load("/home/nvidia/dev/img_new/resnet_v1_with_params.onnx")

# Print the model's graph
print(onnx.helper.print_model(model))

AttributeError: module 'onnx.helper' has no attribute 'print_model'

In [2]:
import onnx
"""
onnx.onnx_ml_pb2.ModelProto is a Python class that represents an 
ONNX model in Protocol Buffer (protobuf) format.

When you load an ONNX model using the onnx.load() function, it returns an 
instance of the ModelProto class, which contains the model's graph, 
inputs, outputs, and other properties.
"""
model = onnx.load("/home/nvidia/dev/img_new/resnet_v1_with_params.onnx")
type(model)

onnx.onnx_ml_pb2.ModelProto

In [3]:
# Get the input and output nodes
input_nodes = model.graph.input
output_nodes = model.graph.output

# Print the input and output nodes
for node in input_nodes:
    print(f"Input Node: {node.name}")
    print(f"  Type: {node.type}")
    print(f"  Shape: {node.type.tensor_type.shape.dim}")

for node in output_nodes:
    print(f"Output Node: {node.name}")
    print(f"  Type: {node.type}")
    print(f"  Shape: {node.type.tensor_type.shape.dim}")

Input Node: input
  Type: tensor_type {
  elem_type: 1
  shape {
    dim {
      dim_param: "input_dynamic_axes_1"
    }
    dim {
      dim_value: 3
    }
    dim {
      dim_value: 160
    }
    dim {
      dim_value: 160
    }
  }
}

  Shape: [dim_param: "input_dynamic_axes_1"
, dim_value: 3
, dim_value: 160
, dim_value: 160
]
Output Node: embedding
  Type: tensor_type {
  elem_type: 1
  shape {
    dim {
      dim_param: "embedding_dynamic_axes_1"
    }
    dim {
      dim_param: "Divembedding_dim_1"
    }
  }
}

  Shape: [dim_param: "embedding_dynamic_axes_1"
, dim_param: "Divembedding_dim_1"
]


In [17]:
triton_url = 'http://localhost:8000/resnet'
from img_xtend.utils.triton import TritonRemoteModel 
model = TritonRemoteModel(triton_url)

img = np.ones((3,160,160),dtype=np.float32)
output_triton = model(img)

In [18]:
np.linalg.norm(output_wo_params - output_triton)

0.019887049

In [84]:
import onnxruntime
import numpy as np
onnx_path = '/home/nvidia/dev/img_new/resnet_v1_with_params.onnx'

ort_session = onnxruntime.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])

dummy_input = np.random.rand(1, 3, 160, 160).astype(np.float32)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)}
ort_inputs = {ort_session.get_inputs()[0].name: dummy_input}
ort_outs = ort_session.run(None, ort_inputs)

output_triton = model(dummy_input[0])
print(len(output_triton))

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(output_triton, ort_outs[0],rtol=1e-4, atol=1e-03)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

1
Exported model has been tested with ONNXRuntime, and the result looks good!


In [98]:
dummy_input = np.random.rand(1, 3, 160, 160).astype(np.float32)
print(dummy_input[0,0,:5,:5])

[[0.13819055 0.40305853 0.3603325  0.04665537 0.7711553 ]
 [0.35251063 0.42294842 0.61401653 0.5975801  0.20294605]
 [0.5354726  0.20468417 0.94154245 0.1505218  0.10396036]
 [0.34416866 0.9045293  0.5529458  0.8818277  0.14427853]
 [0.8166203  0.90845484 0.47745967 0.49778247 0.45150957]]
