In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms

import numpy as np
from PIL import Image
import time

In [None]:
# Create a transform as defined for the pre-trained model
# https://pytorch.org/blog/introducing-torchvision-new-multi-weight-support-api/

transform = transforms.Compose([transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                     std=[0.229, 0.224, 0.225])]) 

device = torch.device(0 if torch.cuda.is_available() else 'cpu')

In [None]:
# Load the pre-trained efficientnet_b1 model
res_net = models.efficientnet_b1(weights="IMAGENET1K_V2").to(device)

# Set to eval mode for inference!
res_net.eval()

In [None]:
# Load test image
test_image = Image.open("../../data/dog.jpg")
test_image.resize((256, 256))

In [None]:
# Convert to tensor
tensor_image = transform(test_image).unsqueeze(0)

In [None]:
# List to store inference times
inference_time = []

# Perform multiple inference runs (10 in this case)
for _ in range(10):
  # Record start time
  start_time = time.time()

  # Forward pass of model
  out_put = res_net(tensor_image.to(device))

  # Record end time
  end_time = time.time()

  # Calculate and store inference time for this run
  inference_time.append(end_time - start_time)

# Print the minimum inference time observed across the runs
print("Minimum inference time %.4fs" % np.min(inference_time))

In [None]:
# Generate a random input tensor to be used for tracing (it does not need to be a "real" example!)
test_input = torch.randn(1, 3, 224, 224, device=device)

# Export the model to ONNX format
torch.onnx.export(
    res_net, # Model to convert
    test_input, # Example input
    "efficientnet_b1.onnx", # Output save name
    opset_version=12, # Version of ONNX operations to use
    export_params=True, # We will store the trained parameter weights inside the ONNX model file
    do_constant_folding=True, # Whether to execute "constant folding" for optimization
    input_names=['input'], # Define the model's input names
    output_names=['output'], # Define the model's output names
    dynamic_axes={'input' : {0 : 'batch_size'}, # Define any variable length axes
                  'output' : {0 : 'batch_size'}}
)