# Optimize the model Size and Performance for mobile platforms


The objective of this notebook is to convert the pytorch model to format compatible to mobile platforms.
1. convert the PyTorch model to ONNX format
   
   ONNX (Open Neural Network Exchange) is open format for exchange of Deep Learning model between different Frameworks. In other words it is format which is widely used for exchanging one model from one framework to other framework. We will first convert model from Pytorch to ONNX so we can transfer it to another framework.
The Process of converting to ONNX is that we make a random dummy input and pass it through the model in pytorch and then pass it throught the function of onnx export to export it to a .onnx file. The shape of the dummy input should be (1, shape of single images) in detail the input would be (1, number of color channels, width of picture , height of picture )`

In [2]:
# helper functions for this notebook
def gvd(variable):
    print("The data type of this variable is " + str(type(variable)))
import time
def mills():
    return int(round(time.time() * 1000))

In [8]:
import torch
import torch.onnx
import torchvision.models as models
import time
import onnx
import os
from PIL import Image

In [4]:

# Load the trained PyTorch model
model = models.resnet152(pretrained=False)
num_ftrs = model.fc.in_features
out_ftrs = 2  # Binary classification
model.fc = torch.nn.Sequential(
    torch.nn.Linear(num_ftrs, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, out_ftrs),
    torch.nn.LogSoftmax(dim=1)
)
model.load_state_dict(torch.load('final_model.pth'))
model.eval()

# Example input with batch size of 1
dummy_input = torch.randn(1, 3, 224, 224)

# Export the model to ONNX format with optimizations
start_time = time.time()
torch.onnx.export(
    model, 
    dummy_input, 
    "model.onnx", 
    verbose=False, 
    export_params=True, 
    do_constant_folding=True
)
end_time = time.time()

print(f"Export time: {end_time - start_time} seconds")




Export time: 3.4193809032440186 seconds



## Run ONNX model with ONNX runtime

In [5]:
# Load the ONNX model
onnx_model = onnx.load("model.onnx")

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
onnx.helper.printable_graph(onnx_model.graph)


'graph main_graph (\n  %input.1[FLOAT, 1x3x224x224]\n) initializers (\n  %fc.0.weight[FLOAT, 512x2048]\n  %fc.0.bias[FLOAT, 512]\n  %fc.2.weight[FLOAT, 2x512]\n  %fc.2.bias[FLOAT, 2]\n  %onnx::Conv_1454[FLOAT, 64x3x7x7]\n  %onnx::Conv_1455[FLOAT, 64]\n  %onnx::Conv_1457[FLOAT, 64x64x1x1]\n  %onnx::Conv_1458[FLOAT, 64]\n  %onnx::Conv_1460[FLOAT, 64x64x3x3]\n  %onnx::Conv_1461[FLOAT, 64]\n  %onnx::Conv_1463[FLOAT, 256x64x1x1]\n  %onnx::Conv_1464[FLOAT, 256]\n  %onnx::Conv_1466[FLOAT, 256x64x1x1]\n  %onnx::Conv_1467[FLOAT, 256]\n  %onnx::Conv_1469[FLOAT, 64x256x1x1]\n  %onnx::Conv_1470[FLOAT, 64]\n  %onnx::Conv_1472[FLOAT, 64x64x3x3]\n  %onnx::Conv_1473[FLOAT, 64]\n  %onnx::Conv_1475[FLOAT, 256x64x1x1]\n  %onnx::Conv_1476[FLOAT, 256]\n  %onnx::Conv_1478[FLOAT, 64x256x1x1]\n  %onnx::Conv_1479[FLOAT, 64]\n  %onnx::Conv_1481[FLOAT, 64x64x3x3]\n  %onnx::Conv_1482[FLOAT, 64]\n  %onnx::Conv_1484[FLOAT, 256x64x1x1]\n  %onnx::Conv_1485[FLOAT, 256]\n  %onnx::Conv_1487[FLOAT, 128x256x1x1]\n  %onnx:

In [6]:
import onnxruntime as nxrun

sess = nxrun.InferenceSession('./model.onnx')
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
start_time = mills()
# run onnx model with onnx runtime python
result = sess.run(None, {input_name: dummy_input.numpy()})

end_time = mills()
print("model single inference in milliSeconds on onnxruntime", end_time - start_time)
print("Output", result)

model single inference in milliSeconds on onnxruntime 65
Output [array([[-0.634313 , -0.7556603]], dtype=float32)]


## Test integirty

In [10]:
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms
import pandas as pd
import numpy as np
# Custom Dataset Class
class CataractDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if idx >= len(self.data):
            # Return placeholder data when index is out of bounds
            return self.get_placeholder_data()
        
        img_name = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        try:
            image = Image.open(img_name).convert('RGB')
            label = int(self.data.iloc[idx, 1])
            if label not in [0, 1]:
                raise ValueError("Invalid label")
        except Exception as e:
            print(f"Warning: {e}. Using a blank image.")
            image = Image.new('RGB', (224, 224))
            label = 0  # Default label for blank image

        if self.transform:
            image = self.transform(image)

        return image, label
    def get_placeholder_data(self):
        # Return a blank image and a default label
        blank_image = Image.new('RGB', (224, 224), color='white')
        default_label = 0  # Default label
        if self.transform:
            blank_image = self.transform(blank_image)
        return blank_image, default_label

test_transforms = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create Dataset and DataLoader
test_dataset = CataractDataset(csv_file='test2.csv', root_dir='./', transform=test_transforms)

valid_size = 0.2
num_train = len(test_dataset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

valid_sampler = SubsetRandomSampler(valid_idx)
testloader = DataLoader(test_dataset, batch_size=1, sampler=valid_sampler)

print(f'Testing examples: {len(test_dataset)}')

Testing examples: 500


In [11]:
# Load the ONNX model
onnx_model_path = "model.onnx"
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)
ort_session = nxrun.InferenceSession(onnx_model_path)

# Function to run inference and check outputs for ONNX model
def check_onnx_model(ort_session, testloader):
    correct = 0
    total = 0
    for images, labels in testloader:
        # Convert images and labels to numpy
        images_np = images.numpy()
        labels_np = labels.numpy()
        
        # Run inference
        ort_inputs = {ort_session.get_inputs()[0].name: images_np}
        ort_outs = ort_session.run(None, ort_inputs)
        
        # Get the predicted class
        outputs = np.array(ort_outs[0])
        predicted = np.argmax(outputs, axis=1)
        
        total += labels_np.size
        correct += (predicted == labels_np).sum().item()
    
    accuracy = correct / total
    print(f'ONNX Model Test Accuracy: {accuracy:.3f}')

# Run the ONNX model integrity check
check_onnx_model(ort_session, testloader)



ONNX Model Test Accuracy: 0.960


## Convert from ONNX to TensorFlow FreezeGraph


We will use onnx-tf to convert model

onnx_tf is the library build by onnx team which is used to transfer the model from onnx to tensorflow it can create a backend to enable the model to run with tensorflow. We will first load the saved .onnx model file with onnx.load and the by using prepare function of onnx_tf prepare that loaded model to be run by tensorflow. and by using the export_graph function of that prepared backend we can export this model in a file format with .tf extension supported by Original Tensorflow


In [38]:
# install onnx and onnx-tf
!pip install --upgrade onnx | tail -n 2
!pip install  onnx-tf==1.5.0 | tail -n 2

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


In [52]:
# see versions of all tensorflow libraries installed
!pip list | grep 'flow'

tensorflow                    2.5.0
tensorflow-addons             0.23.0
tensorflow-cpu                2.16.1
tensorflow-datasets           4.9.3
tensorflow-estimator          2.5.0
tensorflow-hub                0.16.1
tensorflow-io-gcs-filesystem  0.37.0
tensorflow-metadata           1.15.0
tensorflow-model-optimization 0.8.0
tensorflow-probability        0.24.0
tensorflow-text               2.15.0


In [53]:
import onnx
import tensorflow as tf

In [54]:
!onnx-tf convert -i "./model.onnx" -o  './final_model.pb' 

2024-05-30 10:54:57.200026: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-05-30 10:54:57.200051: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top
Traceback (most recent call last):
  File "/home/thembo/college/cataract/bin/onnx-tf", line 5, in <module>
    from onnx_tf.cli import main
  File "/home/thembo/college/cataract/lib/python3.9/site-packages/onnx_tf/__init__.py", line 1, in <module>
    from . import backend
  File "/home/thembo/college/cataract/lib/python3.9/site-packages/onnx_tf/backend.py", line 29, in <module>
    from onnx_tf.common.handler_helper import get_all_backend_handlers
  File "/home/thembo/college/cataract/lib/python3.9/site-packages/onnx_tf/common/han