# PyTorch → ONNX → TensorRT

Simple conversion pipeline for ResNet50:
- Load PyTorch model
- Export to ONNX
- Build TensorRT engine
- Run inference

In [1]:
!nvidia-smi

Sat Jan 24 02:18:12 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060        Off |   00000000:01:00.0 Off |                  N/A |
|  0%   43C    P8             19W /  170W |     647MiB /  12288MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Tue_Dec_16_07:23:41_PM_PST_2025
Cuda compilation tools, release 13.1, V13.1.115
Build cuda_13.1.r13.1/compiler.37061995_0


## 1. Setup & Configuration

In [3]:
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu130
!pip install -q tensorrt==10.8.0.43 --extra-index-url https://pypi.nvidia.com
!pip install -q onnx pycuda numpy

In [4]:
import torch
import torchvision.models as models
import onnx
import numpy as np
import os

# Configuration
ONNX_PATH = 'models/resnet50.onnx'
TENSORRT_PATH = 'models/resnet50.engine'
IMG_SIZE = 224
BATCH_SIZE = 1

os.makedirs('models', exist_ok=True)

print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')

PyTorch: 2.7.1+cu118
CUDA: True


## 2. Load Model & Convert to ONNX

In [5]:
# Load PyTorch ResNet50
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.eval()
print(f'✅ Loaded ResNet50')

✅ Loaded ResNet50


In [6]:
# Convert to ONNX
dummy_input = torch.randn(BATCH_SIZE, 3, IMG_SIZE, IMG_SIZE)
torch.onnx.export(
    model.cpu(),
    dummy_input,
    ONNX_PATH,
    opset_version=18,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}}
)

onnx.checker.check_model(onnx.load(ONNX_PATH))
print(f'✅ Saved ONNX: {ONNX_PATH}')

✅ Saved ONNX: models/resnet50.onnx


## 3. Convert to TensorRT

## 3.1 Import Libraries

In [7]:
import tensorrt as trt

# Define Logger
logger = trt.Logger(trt.Logger.WARNING)

## 3.2 Define builder

In [8]:
builder = trt.Builder(logger)

## 3.3 Define network

In [9]:
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))


## 3.4 Parse ONNX model

In [10]:
parser = trt.OnnxParser(network, logger)

# Parse ONNX
with open(ONNX_PATH, 'rb') as f:
    parser.parse(f.read())

## 3.5 Configure engine

In [11]:
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)

## 3.6 Dynamic batch

In [12]:
# Set dynamic batch profile
profile = builder.create_optimization_profile()
profile.set_shape('input', (1, 3, IMG_SIZE, IMG_SIZE), (1, 3, IMG_SIZE, IMG_SIZE), (8, 3, IMG_SIZE, IMG_SIZE))
config.add_optimization_profile(profile)

0

In [13]:

# Enable FP16 if available
if builder.platform_has_fast_fp16:
    config.set_flag(trt.BuilderFlag.FP16)
    print('✅ FP16 enabled')

# Build and save
print('Building engine (may take a few minutes)...')
engine = builder.build_serialized_network(network, config)
with open(TENSORRT_PATH, 'wb') as f:
    f.write(engine)

print(f'✅ Saved TensorRT engine: {TENSORRT_PATH}')

✅ FP16 enabled
Building engine (may take a few minutes)...
✅ Saved TensorRT engine: models/resnet50.engine


## 4. Run Inference

### 4.1 Init CUDA

In [14]:
import tensorrt as trt

import pycuda.driver as cuda
import pycuda.autoinit

### 4.2 Load engine

In [None]:
with open(TENSORRT_PATH, 'rb') as f:
    runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()


In [24]:
# Get tensor names
input_name = engine.get_tensor_name(0)
output_name = engine.get_tensor_name(1)

print(f'Input tensor name: {input_name}')
print(f'Output tensor name: {output_name}')

Input tensor name: input
Output tensor name: output


### 4.3  Set input shape

In [26]:
input_shape = (BATCH_SIZE, 3, IMG_SIZE, IMG_SIZE)
print(f'Input shape: {input_shape}')

Input shape: (1, 3, 224, 224)


In [27]:
context.set_input_shape(input_name, input_shape)

True

In [28]:
output_shape = context.get_tensor_shape(output_name)

print(f'Output shape: {output_shape}')

Output shape: (1, 1000)


### 4.4 Allocate memory

In [20]:
h_input = np.random.randn(*input_shape).astype(np.float32)
h_output = np.empty(output_shape, dtype=np.float32)
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
stream = cuda.Stream()

### 4.5 Run inference

In [21]:
# Set tensor addresses
context.set_tensor_address(input_name, int(d_input))
context.set_tensor_address(output_name, int(d_output))

True

In [22]:
cuda.memcpy_htod_async(d_input, h_input, stream)
context.execute_async_v3(stream_handle=stream.handle)
cuda.memcpy_dtoh_async(h_output, d_output, stream)
stream.synchronize()

In [23]:
# Results
print(f'✅ Inference completed')
print(f'Top-5 classes: {np.argsort(h_output[0])[-5:][::-1]}')

✅ Inference completed
Top-5 classes: [904 490 828 556 488]
