<a href="https://colab.research.google.com/github/nyadla-sys/pytorch_2_tflite/blob/main/pytorch_to_onnx_to_tflite(quantized)_with_imagedata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install ONNX and ONNX runtime

In [None]:
!pip install onnx
!pip install onnxruntime
# Some standard imports
import numpy as np
import torch
import torch.onnx
import torchvision.models as models
import onnx
import onnxruntime

Collecting onnx
  Downloading onnx-1.11.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 8.9 MB/s 
Installing collected packages: onnx
Successfully installed onnx-1.11.0
Collecting onnxruntime
  Downloading onnxruntime-1.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 5.2 MB/s 
Installing collected packages: onnxruntime
Successfully installed onnxruntime-1.10.0


## Load mobilenetV2 from torch models

In [None]:
model = models.mobilenet_v2(pretrained=True)
model.eval()

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

MobileNetV2(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05,

##Convert from pytorch to onnx

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 1
IMAGE_SIZE = 224
# Input to the model
x = torch.randn(BATCH_SIZE, 3, 224, 224, requires_grad=True)
torch_out = model(x)

# Export the model
torch.onnx.export(model,                     # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "mobilenet_v2.onnx",       # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'BATCH_SIZE'},    # variable length axes
                                'output' : {0 : 'BATCH_SIZE'}})

In [None]:
onnx_model = onnx.load("mobilenet_v2.onnx")
onnx.checker.check_model(onnx_model)

##Compare ONNX Runtime and Pytorch results

In [None]:
ort_session = onnxruntime.InferenceSession("mobilenet_v2.onnx")

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Exported model has been tested with ONNXRuntime, and the result looks good!


##Convert from Onnx to TF saved model

In [None]:
!pip install onnx-tf

from onnx_tf.backend import prepare
import onnx

onnx_model_path = 'mobilenet_v2.onnx'
tf_model_path = 'model_tf'

onnx_model = onnx.load(onnx_model_path)
tf_rep = prepare(onnx_model)
tf_rep.export_graph(tf_model_path)

Collecting onnx-tf
  Downloading onnx_tf-1.9.0-py3-none-any.whl (222 kB)
[K     |████████████████████████████████| 222 kB 5.4 MB/s 
[?25hCollecting tensorflow-addons
  Downloading tensorflow_addons-0.16.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 34.7 MB/s 
Installing collected packages: tensorflow-addons, onnx-tf
Successfully installed onnx-tf-1.9.0 tensorflow-addons-0.16.1




INFO:tensorflow:Assets written to: model_tf/assets


INFO:tensorflow:Assets written to: model_tf/assets


##Convert from TF saved model to TFLite(float32) model

In [None]:
import tensorflow as tf

saved_model_dir = 'model_tf'
tflite_model_path = 'mobilenet_v2_float32.tflite'

# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
tflite_model = converter.convert()

# Save the model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)



##Run inference on TFLite(float32) with random data

In [None]:
import numpy as np
import tensorflow as tf

tflite_model_path = '/content/mobilenet_v2_float32.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model on random input data
input_shape = input_details[0]['shape']
print(input_shape)
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# get_tensor() returns a copy of the tensor data
# use tensor() in order to get a pointer to the tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

print("Predicted value for [0, 1] normalization. Label index: {}, confidence: {:2.0f}%"
      .format(np.argmax(output_data), 
              100 * output_data[0][np.argmax(output_data)]))

[  1   3 224 224]
Predicted value for [0, 1] normalization. Label index: 892, confidence: 579%


##Run inference on TFLite(float32) with image data

In [None]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
    -O /content/cats_and_dogs_filtered.zip

    

--2022-03-04 05:09:25--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.201.128, 173.194.193.128, 173.194.196.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.201.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘/content/cats_and_dogs_filtered.zip’


2022-03-04 05:09:25 (128 MB/s) - ‘/content/cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [None]:
import os
import zipfile

local_zip = '/content/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
import tensorflow as tf
import numpy as np
tflite_model_path = '/content/mobilenet_v2_float32.tflite'

#tflite_model_path = '/content/model_float32.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

print("== Input details ==")
print("name:", interpreter.get_input_details()[0]['name'])
print("shape:", interpreter.get_input_details()[0]['shape'])
print("type:", interpreter.get_input_details()[0]['dtype'])

print("\nDUMP INPUT")
print(interpreter.get_input_details()[0])

print("\n== Output details ==")
print("name:", interpreter.get_output_details()[0]['name'])
print("shape:", interpreter.get_output_details()[0]['shape'])
print("type:", interpreter.get_output_details()[0]['dtype'])

print("\nDUMP OUTPUT")
print(interpreter.get_output_details()[0])

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model on image  data
input_shape = input_details[0]['shape']
#print(input_shape)
image = tf.io.read_file('/content/cats_and_dogs_filtered/validation/cats/cat.2000.jpg')

image = tf.io.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
image = tf.reshape(image,[3,IMAGE_SIZE,IMAGE_SIZE])
image = tf.expand_dims(image, 0)
print("Real image shape")
print(image.shape)
#print(image)
interpreter.set_tensor(input_details[0]['index'], image)

interpreter.invoke()

# get_tensor() returns a copy of the tensor data
# use tensor() in order to get a pointer to the tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

print("Predicted value . Label index: {}, confidence: {:2.0f}%"
      .format(np.argmax(output_data), 
              100 * output_data[0][np.argmax(output_data)]))

== Input details ==
name: serving_default_input:0
shape: [  1   3 224 224]
type: <class 'numpy.float32'>

DUMP INPUT
{'name': 'serving_default_input:0', 'index': 0, 'shape': array([  1,   3, 224, 224], dtype=int32), 'shape_signature': array([ -1,   3, 224, 224], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}

== Output details ==
name: PartitionedCall:0
shape: [   1 1000]
type: <class 'numpy.float32'>

DUMP OUTPUT
{'name': 'PartitionedCall:0', 'index': 346, 'shape': array([   1, 1000], dtype=int32), 'shape_signature': array([  -1, 1000], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
Real image shape
(1, 3, 224, 224)
Predict

####Convert from TF saved model to TFLite(quantized) model

In [None]:
# A generator that provides a representative dataset
import tensorflow as tf
from PIL import Image
from torchvision import transforms
saved_model_dir = 'model_tf'
#flowers_dir = '/content/images'
def representative_data_gen():
  dataset_list = tf.data.Dataset.list_files('/content/cats_and_dogs_filtered/train' + '/*/*')
  for i in range(1):
    image = next(iter(dataset_list))
    image = tf.io.read_file(image)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
    image = tf.reshape(image,[3,IMAGE_SIZE,IMAGE_SIZE])
    image = tf.cast(image / 127., tf.float32)
    image = tf.expand_dims(image, 0)
    print(image.shape)    
    yield [image]

from PIL import Image
from torchvision import transforms
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

def representative_data_gen_1():
  dataset_list = tf.data.Dataset.list_files('/content/cats_and_dogs_filtered/train' + '/*/*')
  for i in range(100):
    input_image = next(iter(dataset_list))      
    input_image = Image.open(filename)
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_image)
    print(input_tensor.shape)
    input_tensor = tf.expand_dims(input_tensor, 0)
    print("torch input_tensor size")
    print(input_tensor.shape)    
    yield [input_tensor]   
     
#converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) 
# This enables quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# This sets the representative dataset for quantization
converter.representative_dataset = representative_data_gen_1
# This ensures that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# For full integer quantization, though supported types defaults to int8 only, we explicitly declare it for clarity.
converter.target_spec.supported_types = [tf.int8]
# These set the input and output tensors to uint8 (added in r2.3)
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()

with open('mobilenet_v2_1.0_224_quant.tflite', 'wb') as f:
  f.write(tflite_model)

torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 



##Run inference on TFLite(quantized) with image data

In [None]:
import tensorflow as tf
import numpy as np
tflite_model_path = '/content/mobilenet_v2_1.0_224_quant.tflite'
#tflite_model_path = '/content/model_float32.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

print("== Input details ==")
print("name:", interpreter.get_input_details()[0]['name'])
print("shape:", interpreter.get_input_details()[0]['shape'])
print("type:", interpreter.get_input_details()[0]['dtype'])
 

print("\nDUMP INPUT")
print(interpreter.get_input_details()[0])

print("\n== Output details ==")
print("name:", interpreter.get_output_details()[0]['name'])
print("shape:", interpreter.get_output_details()[0]['shape'])
print("type:", interpreter.get_output_details()[0]['dtype'])

print("\nDUMP OUTPUT")
print(interpreter.get_output_details()[0])

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
test_details = interpreter.get_input_details()[0]
scale, zero_point = test_details['quantization']
print(scale)
print(zero_point)
# Test the model on image  data
input_shape = input_details[0]['shape']
#print(input_shape)
#image = tf.io.read_file('/content/169371301_d9b91a2a42.jpg')
#image = tf.io.read_file('/content/istockphoto-472306883-1024x1024.jpg')
#image = tf.io.read_file('/content/images/Car-PNG-HD.png')
image = tf.io.read_file('/content/cats_and_dogs_filtered/validation/cats/cat.2000.jpg')

image = tf.io.decode_jpeg(image, channels=3)
#print(image.shape)     
image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
image = tf.reshape(image,[3,IMAGE_SIZE,IMAGE_SIZE])
#print(image.shape)
image = tf.cast(image / 127., tf.int8)
#image = tf.cast(image , tf.float32)
image = tf.expand_dims(image, 0)
print("Real image shape")
print(image.shape)
#image = np.int8(image)
#print(image)
#image = np.float32(image / scale + zero_point)
interpreter.set_tensor(input_details[0]['index'], image)

interpreter.invoke()

# get_tensor() returns a copy of the tensor data
# use tensor() in order to get a pointer to the tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

print("Predicted value . Label index: {}, confidence: {:2.0f}%"
      .format(np.argmax(output_data), 
              100 * output_data[0][np.argmax(output_data)]))

##Run inference on TFLite(float32) model with dog.jpg
"https://github.com/pytorch/hub/raw/master/images/dog.jpg"

In [None]:
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [None]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
print(input_tensor.shape)
#input_tensor = tf.reshape(input_tensor,[3,IMAGE_SIZE,IMAGE_SIZE])
#input_tensor = tf.cast(input_tensor , tf.float32)
input_tensor = tf.expand_dims(input_tensor, 0)
print("torch input_tensor size")
print(input_tensor.shape)

import numpy as np
import tensorflow as tf

tflite_model_path = '/content/mobilenet_v2_float32.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model on random input data
input_shape = input_details[0]['shape']
print(input_shape)
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_tensor)

interpreter.invoke()

# get_tensor() returns a copy of the tensor data
# use tensor() in order to get a pointer to the tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

print("Predicted value for [0, 1] normalization. Label index: {}, confidence: {:2.0f}%"
      .format(np.argmax(output_data), 
              100 * output_data[0][np.argmax(output_data)]))

torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
[  1   3 224 224]
Predicted value for [0, 1] normalization. Label index: 258, confidence: 1436%


##Run inference on TFLite(quantized) model with dog.jpg
"https://github.com/pytorch/hub/raw/master/images/dog.jpg"

In [None]:
import tensorflow as tf
import numpy as np
tflite_model_path = '/content/mobilenet_v2_1.0_224_quant.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
print(input_tensor.shape)

#input_tensor = tf.reshape(input_tensor,[3,IMAGE_SIZE,IMAGE_SIZE])
#input_tensor = tf.cast(input_tensor , tf.float32)
input_tensor = tf.expand_dims(input_tensor, 0)
print("torch input_tensor size")
print(input_tensor.shape)

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
test_details = interpreter.get_input_details()[0]

# Test the model on image  data
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
print(input_tensor.shape)
input_tensor = np.int8(input_tensor)
input_tensor = tf.expand_dims(input_tensor, 0)
print("torch input_tensor size:")
print(input_tensor.shape)
print(input_tensor)
interpreter.set_tensor(input_details[0]['index'], input_tensor)

interpreter.invoke()

# get_tensor() returns a copy of the tensor data
# use tensor() in order to get a pointer to the tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

print("Predicted value . Label index: {}, confidence: {:2.0f}%"
      .format(np.argmax(output_data), 
              100 * output_data[0][np.argmax(output_data)]))

torch.Size([3, 224, 224])
torch input_tensor size
(1, 3, 224, 224)
torch.Size([3, 224, 224])
torch input_tensor size:
(1, 3, 224, 224)
tf.Tensor(
[[[[-1 -1 -1 ... -2 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ... -2 -1 -1]
   ...
   [-1 -1 -1 ...  0 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ...  0 -1 -1]]

  [[-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   ...
   [ 0  0  0 ...  0  0  0]
   [ 0  0 -1 ...  0 -1  0]
   [ 0  0  0 ...  0  0  0]]

  [[-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   [-1 -1 -1 ... -1 -1 -1]
   ...
   [-1 -1 -1 ...  0 -1 -1]
   [-1 -1 -1 ...  0 -1 -1]
   [-1 -1 -1 ...  0 -1 -1]]]], shape=(1, 3, 224, 224), dtype=int8)
Predicted value . Label index: 701, confidence: 100%
