# PyTorch → ONNX → TensorRT

Simple conversion pipeline for ResNet50:
- Load PyTorch model
- Export to ONNX
- Build TensorRT engine
- Run inference

## 1. Setup & Configuration

In [1]:
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu118
!pip install -q tensorrt==10.0.1 --extra-index-url https://pypi.nvidia.com
!pip install -q onnx pycuda

In [2]:
import torch
import torchvision.models as models
import onnx
import numpy as np
import os

# Configuration
ONNX_PATH = 'models/resnet50.onnx'
TENSORRT_PATH = 'models/resnet50.engine'
IMG_SIZE = 224
BATCH_SIZE = 1

os.makedirs('models', exist_ok=True)

print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')

PyTorch: 2.7.1+cu118
CUDA: True


## 2. Load Model & Convert to ONNX

In [3]:
# Load PyTorch ResNet50
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.eval()
print(f'✅ Loaded ResNet50')

✅ Loaded ResNet50


In [4]:
# Convert to ONNX
dummy_input = torch.randn(BATCH_SIZE, 3, IMG_SIZE, IMG_SIZE)
torch.onnx.export(
    model.cpu(),
    dummy_input,
    ONNX_PATH,
    opset_version=18,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}}
)

onnx.checker.check_model(onnx.load(ONNX_PATH))
print(f'✅ Saved ONNX: {ONNX_PATH}')

✅ Saved ONNX: models/resnet50.onnx


## 3. Convert to TensorRT

In [5]:
import tensorrt as trt

logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, logger)

# Parse ONNX
with open(ONNX_PATH, 'rb') as f:
    parser.parse(f.read())

# Configure engine
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)

# Set dynamic batch profile
profile = builder.create_optimization_profile()
profile.set_shape('input', (1, 3, IMG_SIZE, IMG_SIZE), (1, 3, IMG_SIZE, IMG_SIZE), (8, 3, IMG_SIZE, IMG_SIZE))
config.add_optimization_profile(profile)

# Enable FP16 if available
if builder.platform_has_fast_fp16:
    config.set_flag(trt.BuilderFlag.FP16)
    print('✅ FP16 enabled')

# Build and save
print('Building engine (may take a few minutes)...')
engine = builder.build_serialized_network(network, config)
with open(TENSORRT_PATH, 'wb') as f:
    f.write(engine)

print(f'✅ Saved TensorRT engine: {TENSORRT_PATH}')

✅ FP16 enabled
Building engine (may take a few minutes)...
✅ Saved TensorRT engine: models/resnet50.engine


## 4. Run Inference

In [6]:
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

# Load engine
with open(TENSORRT_PATH, 'rb') as f:
    runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

# Get tensor names
input_name = engine.get_tensor_name(0)
output_name = engine.get_tensor_name(1)

# Set input shape
input_shape = (BATCH_SIZE, 3, IMG_SIZE, IMG_SIZE)
context.set_input_shape(input_name, input_shape)
output_shape = context.get_tensor_shape(output_name)

# Allocate memory
h_input = np.random.randn(*input_shape).astype(np.float32)
h_output = np.empty(output_shape, dtype=np.float32)
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
stream = cuda.Stream()

# Set tensor addresses
context.set_tensor_address(input_name, int(d_input))
context.set_tensor_address(output_name, int(d_output))

# Run inference
cuda.memcpy_htod_async(d_input, h_input, stream)
context.execute_async_v3(stream_handle=stream.handle)
cuda.memcpy_dtoh_async(h_output, d_output, stream)
stream.synchronize()

# Results
print(f'✅ Inference completed')
print(f'Top-5 classes: {np.argsort(h_output[0])[-5:][::-1]}')

✅ Inference completed
Top-5 classes: [490 904 828 488 446]
