In [None]:
# !pip install onnx
# !pip install onnxruntime



In [2]:
import torch
import torch.nn as nn
import time
import torch.onnx
import onnxruntime as ort
import numpy as np

print(torch.cuda.is_available()) 

False


In [3]:

# Define a simple feedforward network

class SimpleNet (nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        # Define layers
        self.fc1 = nn.Linear(2, 2)  # Input to layer 1
        self.fc2 = nn.Linear(2, 3)  # Layer 1 to layer 2
        self.fc3 = nn.Linear(3, 4)  # Layer 2 to layer 3 (output)

    def forward(self, x):
        # Forward pass through the network
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)  # Output layer has no activation
        return x

# Instantiate the model
model = SimpleNet()

# Example input for inference
example_input = torch.tensor([[1.0, 2.0]])

# Perform inference
output = model(example_input)

# Print the inference output
print("PyTorch Inference Output:", output.detach().numpy())


PyTorch Inference Output: [[-0.8365197   0.6591537   0.18199769 -0.04102439]]


In [6]:

# Specify the path for the ONNX model file
onnx_model_path = "simple_model.onnx"

# Convert the PyTorch model to ONNX
torch.onnx.export (
    model,                          # model being exported
    example_input,                  # model input (or a tuple for multiple inputs)
    onnx_model_path,                # where to save the model (can be a file or file-like object)
    input_names = ["input"],        # the model's input names
    output_names = ["output"],      # the model's output names
)

print(f"Model successfully converted to ONNX: {onnx_model_path}")


[torch.onnx] Obtain model graph for `SimpleNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `SimpleNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Model successfully converted to ONNX: simple_model.onnx


In [9]:

# Load the ONNX model and create an inference session
onnx_model_path = "simple_model.onnx"
ort_session = ort.InferenceSession(onnx_model_path)

# Prepare sample input data (same shape as the PyTorch model)
onnx_input = np.array([[1.0, 2.0]], dtype=np.float32)

# Run inference on the ONNX model
onnx_output = ort_session.run(None, {"input": onnx_input})

# Print the ONNX inference result
print("ONNX Inference Output:", onnx_output)

# PyTorch Inference Output: [[-0.8365197   0.6591537   0.18199769 -0.04102439]]


ONNX Inference Output: [array([[-0.8365197 ,  0.6591537 ,  0.18199769, -0.0410244 ]],
      dtype=float32)]


In [10]:

# Run 1000 tests and get the average run time (PyTorch model)
time_sum = 0
n_iters = 1000

for i in range (n_iters):
  example_input = torch.randn(1, 2)
  t0 = time.time()
  output = model(example_input) # PyTorch model
  time_sum += (time.time() - t0)

print(f"Total time: {time_sum / n_iters}")

# 0.00012366795539855958

Total time: 0.00012366795539855958


In [11]:

# Run 1000 tests and get the average run time (ONNX model)
time_sum = 0
n_iters = 1000

onnx_model_path = "simple_model.onnx"
ort_session = ort.InferenceSession(onnx_model_path)

for i in range (n_iters):
  onnx_input = np.random.rand(1, 2).astype(np.float32) # Numpy
  t0 = time.time()
  onnx_output = ort_session.run(None, {"input": onnx_input})
  time_sum += (time.time() - t0)

print(f"Total time: {time_sum / n_iters}")

# 0.00003591752

Total time: 3.591752052307129e-05


In [14]:
# Compare the run times
# The PyTorch model takes 3.44 times as long for inference

0.00012366795539855958 / 0.00003591752

3.443109529793805