# ONNX Format
In this demo we are going to go through the process of exporting our best model to ONNX format and then using the runtime for inference.

Why do this?
ONNX is an open standard format that enables model interoperability across different frameworks and platforms, making it easier to deploy models in diverse environments such as cloud, edge, or mobile devices. 

ONNX Runtime is highly optimized for performance, providing faster inference speeds through techniques like graph optimizations and support for hardware accelerators, including GPUs, CPUs, and specialized inference chips. This combination allows developers to achieve scalability, portability, and performance improvements, while simplifying integration into non-PyTorch ecosystems.

In [27]:
# Install the required modules
!pip install onnx onnxruntime



In [28]:
#Solution (for your Mac M4)
!pip install onnx onnxruntime-silicon



In [29]:
# RESTART YOUR NOTEBOOK FOR CHANGES TO TAKE 

## Load our best model
Before we begin we must load our best model


In [30]:
# Import modules
import torch
import torch.nn as nn
from torchvision import models

In [31]:

# Load the mobilenet_v3_large model with default weights
model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.DEFAULT)

In [32]:
# Modify last layer of the model for 2 classes as output
model.classifier[-1] = nn.Linear(1280, 2)

In [33]:
# Load the model from checkpoint
checkpoint = torch.load('mobilenet_checkpoint.tar', weights_only=True)

In [34]:
# Load the parameters from the checkpoint
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

## Export our Model to ONNX format

In [35]:
# Import the module: NOTE that ONNX is built into PyTorch!
import torch.onnx

In [54]:
# Read the helper function to export
help(torch.onnx.export)

Help on function export in module torch.onnx:

export(model: 'torch.nn.Module | torch.export.ExportedProgram | torch.jit.ScriptModule | torch.jit.ScriptFunction', args: 'tuple[Any, ...]' = (), f: 'str | os.PathLike | None' = None, *, kwargs: 'dict[str, Any] | None' = None, verbose: 'bool | None' = None, input_names: 'Sequence[str] | None' = None, output_names: 'Sequence[str] | None' = None, opset_version: 'int | None' = None, dynamo: 'bool' = True, external_data: 'bool' = True, dynamic_shapes: 'dict[str, Any] | tuple[Any, ...] | list[Any] | None' = None, custom_translation_table: 'dict[Callable, Callable | Sequence[Callable]] | None' = None, report: 'bool' = False, optimize: 'bool' = True, verify: 'bool' = False, profile: 'bool' = False, dump_exported_program: 'bool' = False, artifacts_dir: 'str | os.PathLike' = '.', fallback: 'bool' = False, export_params: 'bool' = True, keep_initializers_as_inputs: 'bool' = False, dynamic_axes: 'Mapping[str, Mapping[int, str]] | Mapping[str, Sequence

In [37]:
# Create an example output 
example_input = torch.randn(1, 3, 224, 224)

In [38]:
# Invoke export
torch.onnx.export(model, example_input, "image_classifier.onnx",  opset_version=19,          # ðŸ‘ˆ force opset 19
    dynamo=False)

  torch.onnx.export(model, example_input, "image_classifier.onnx",  opset_version=19,          # ðŸ‘ˆ force opset 19


In [39]:
# Check the model consistency
import onnx

# Load it with ONNX
onnx_model = onnx.load("image_classifier.onnx")
# Check it
print(onnx.checker.check_model(onnx_model))


None


## Load an example image for inference

In [40]:
# Transformations are still required
from PIL import Image
from torchvision.transforms import v2

transform = v2.Compose([
    v2.Resize((224, 224)),
    v2.ToImage(), 
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], 
                 std=[0.229, 0.224, 0.225])
])

In [41]:
# Open an image
image_path = 'sample-input.jpg'
image = Image.open(image_path)

In [42]:
# Apply the transformation
transformed_image = transform(image)
transformed_image.shape

torch.Size([3, 224, 224])

In [43]:
# Add additional dimension due to requirements: [batch_size, channels, height, width]
transformed_image = transformed_image.unsqueeze(0)
transformed_image.shape

torch.Size([1, 3, 224, 224])

In [44]:
# Convert our transformed image to a Numpy Array
import numpy as np

image_np = np.array(transformed_image, dtype=np.float32)

## Run inference using ONNX Runtime
The ONNX Runtime is a high-performance inference engine designed to execute models in the open ONNX format across various platforms and devices. It optimizes model execution through graph-level optimizations and supports hardware accelerators, enabling fast, scalable, and portable deployments in diverse environments.

In [45]:
# Import the runtime
import onnxruntime as ort

In [46]:
import sys, numpy as np
print("PYTHON:", sys.executable)
print("NUMPY:", np.__file__, np.__version__)

PYTHON: /Users/JABERI/Downloads/PyTorch-main/.venv311/bin/python
NUMPY: /Users/JABERI/Downloads/PyTorch-main/.venv311/lib/python3.11/site-packages/numpy/__init__.py 1.26.4


In [47]:
# Load the model
import onnx

onnx_model = onnx.load("image_classifier.onnx")

In [48]:
# Start on inference Session on the runtime 
session = ort.InferenceSession("image_classifier.onnx")

In [49]:
# Convert the image to a numpy array
import numpy as np 

image_np = np.array(transformed_image, dtype=np.float32)

In [50]:
# Run inference

# Create input to be passed to the model
inputs = {session.get_inputs()[0].name: image_np}
# Run the inference
outputs = session.run(None, inputs)
print(outputs) # raw outputs (logits) from final layer

[array([[-0.18219902,  0.07574947]], dtype=float32)]


In [51]:
# Get the predicted class
predicted = outputs[0][0].argmax(0)
print(predicted)

1


In [52]:
# Define our Dataset Class and label encoding
label_encoding = {"malignant": 0, "benign": 1}

In [53]:
# Reverse index the label_encoding dictionary 
index_to_class_map = {v: k for k, v in label_encoding.items()}
print(f"Predicted Class: {index_to_class_map[predicted.item()]}")

Predicted Class: benign
