# 1. Installation


In [1]:
!pip install -q torch torchvision onnx onnxruntime onnxscript tqdm

In [2]:
import onnx
onnx.__version__

'1.20.1'

In [3]:
import onnxruntime as ort
print(ort.__version__)
print(ort.get_available_providers())

1.23.2
['AzureExecutionProvider', 'CPUExecutionProvider']


In [4]:
import torch
torch.__version__, torch.cuda.is_available()

('2.9.1+cu128', True)

# 2. Load Pytorch model

In [5]:
import torch
import torchvision.models as models

torch_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
torch_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# 3. Convert to ONNX

## Config

In [6]:
ONNX_PATH = "./resnet50.onnx"
IMG_SIZE = 224
OPSET = 18

## Dummy input

In [7]:
dummy_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE)

## Export

In [8]:
torch.onnx.export(
    torch_model,
    dummy_input,
    ONNX_PATH,
    export_params=True,
    opset_version=OPSET,
    input_names=["input"],
    output_names=["output"],
)

[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 106 of general pattern rewrite rules.


ONNXProgram(
    model=
        <
            ir_version=10,
            opset_imports={'': 18},
            producer_name='pytorch',
            producer_version='2.9.1+cu128',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"input"<FLOAT,[1,3,224,224]>
            ),
            outputs=(
                %"output"<FLOAT,[1,1000]>
            ),
            initializers=(
                %"conv1.weight"<FLOAT,[64,3,7,7]>{Tensor(...)},
                %"layer1.0.conv1.weight"<FLOAT,[64,64,1,1]>{Tensor(...)},
                %"layer1.0.conv2.weight"<FLOAT,[64,64,3,3]>{Tensor(...)},
                %"layer1.0.conv3.weight"<FLOAT,[256,64,1,1]>{Tensor(...)},
                %"layer1.0.downsample.0.weight"<FLOAT,[256,64,1,1]>{Tensor(...)},
                %"layer1.1.conv1.weight"<FLOAT,[64,256,1,1]>{Tensor(...)},
                %"layer1.1.conv2.weight"<FLOAT,[64,64,3,3]>{Tensor(...)},
     

In [9]:
from onnx import checker

onnx_model = onnx.load(ONNX_PATH)
checker.check_model(onnx_model)

# 4. Benchmark (Native Pytorch vs ONNX Runtime)

## Config

In [10]:
import time
from tqdm import tqdm
import numpy as np

SAMPLES = 1000
IMG_SIZE = 224

## Pytorch (CPU)

In [11]:
pytorch_total_time = 0.0

for _ in tqdm(range(SAMPLES)):
    x = torch.randn(1, 3, IMG_SIZE, IMG_SIZE)
    start_time = time.time()
    with torch.no_grad():
        y = torch_model(x)
    end_time = time.time()
    
    pytorch_total_time += (end_time - start_time)

100%|██████████| 1000/1000 [00:31<00:00, 32.03it/s]


In [12]:
print(f"Pytorch Total Inference Time for {SAMPLES} samples: {pytorch_total_time:.4f} seconds")

Pytorch Total Inference Time for 1000 samples: 30.5629 seconds


## ONNX Runtime

In [14]:
session = ort.InferenceSession(
    "resnet50.onnx",
    providers=["CPUExecutionProvider"]
)

In [15]:
onnx_total_time = 0.0

for _ in tqdm(range(SAMPLES)):
    x = np.random.randn(1, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
    start_time = time.time()
    y = session.run(None, {"input": x})
    end_time = time.time()
    onnx_total_time += (end_time - start_time)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:13<00:00, 74.52it/s]


In [16]:
print(f"ONNX Total Inference Time for {SAMPLES} samples: {onnx_total_time:.4f} seconds")

ONNX Total Inference Time for 1000 samples: 11.0720 seconds
