<a href="https://colab.research.google.com/github/pranay8297/ExpNotebooks/blob/main/oonx_to_tflite_and_parseq_to_torchscript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ONNX to TFLite

In [None]:
# 1. Clone the repository
!git clone https://github.com/MPolaris/onnx2tflite.git

# 2. Enter the project directory
%cd onnx2tflite

# 3. Install in editable / “development” mode
!pip install -e .


In [None]:
from onnx2tflite import onnx_converter

res = onnx_converter(
    onnx_model_path="./judge_model.onnx",  # path to your exported ONNX
    output_path="./",        # where to write the .tflite
    target_formats=['tflite'],             # only TFLite
    need_simplify=True                     # optional: simplify ONNX graph first
)
print("Generated TFLite model at:", res['tflite'])


Checking 0/1...
shape[0] of input "input" is dynamic, we assume it presents batch size and set it as 1 when testing. If it is not wanted, please set the it manually by --test-input-shape (see `onnxsim -h` for the details).




Generated TFLite model at: ./judge_model.tflite


In [None]:
!pip install tflite-runtime

Collecting tflite-runtime
  Downloading tflite_runtime-2.14.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading tflite_runtime-2.14.0-cp311-cp311-manylinux2014_x86_64.whl (2.4 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.4/2.4 MB[0m [31m104.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tflite-runtime
Successfully installed tflite-runtime-2.14.0


In [None]:
import tensorflow as tf

# 1. Create the interpreter and load the model
interpreter = tf.lite.Interpreter(model_path="judge_model.tflite")
interpreter.allocate_tensors()

In [None]:
interpreter

<tensorflow.lite.python.interpreter.Interpreter at 0x7808365cb890>

In [None]:
input_details  = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input details:", input_details)
print("Output details:", output_details)

Input details: [{'name': 'serving_default_input_2:0', 'index': 0, 'shape': array([  1,   3, 224, 224], dtype=int32), 'shape_signature': array([  1,   3, 224, 224], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
Output details: [{'name': 'StatefulPartitionedCall:0', 'index': 324, 'shape': array([1, 2], dtype=int32), 'shape_signature': array([1, 2], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [None]:
from PIL import Image
import numpy as np

# 1. Load and resize the image to match model input
img = Image.open("bad.jpg").convert("RGB").resize((224, 224))

# 2. Convert to a NumPy array, float32, NHWC order [H, W, C]
input_data = np.array(img, dtype=np.float32)

# 3. Add the batch dimension: [1, H, W, C]
input_data = np.expand_dims(input_data, axis=0)          # (1, 224, 224, 3)
input_data = input_data.transpose(0, 3, 1, 2) / 255.

In [None]:
input_data.shape, input_data.max(), input_data.min(), input_data.mean(), input_data.std()

((1, 3, 224, 224), 1.0, 0.0, 0.3061117, 0.29185376)

In [None]:
interpreter.set_tensor(input_details[0]['index'], input_data)

# 2. Run the model
interpreter.invoke()

In [None]:
logits = interpreter.get_tensor(output_details[0]['index'])
logits

array([[ 2.8056283, -3.4399908]], dtype=float32)

# ParSEQ direct tensorflow experiments - worked but accuracy did not translate


In [1]:
!pip install --upgrade "python-doctr[tf]==0.8.0"

Collecting python-doctr==0.8.0 (from python-doctr[tf]==0.8.0)
  Downloading python_doctr-0.8.0-py3-none-any.whl.metadata (32 kB)
Collecting numpy<2.0.0,>=1.16.0 (from python-doctr==0.8.0->python-doctr[tf]==0.8.0)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pypdfium2<5.0.0,>=4.0.0 (from python-doctr==0.8.0->python-doctr[tf]==0.8.0)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyclipper<2.0.0,>=1.2.0 (from python-doctr==0.8.0->python-doctr[tf]==0.8.0)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting langdetect<2.0.0,>=1.0.9 (from pytho

In [1]:
import doctr
print(doctr.__version__)

v0.8.0


In [2]:

# This creates a TensorFlow model
from doctr.models.recognition.parseq.tensorflow import parseq
model = parseq(pretrained=True)

Downloading https://doctr-static.mindee.com/models?id=v0.6.0/parseq-24cf693e.zip&src=0 to /root/.cache/doctr/models/parseq-24cf693e.zip


  0%|          | 0/87753821 [00:00<?, ?it/s]

In [9]:
model.__class__

In [10]:
import cv2
import numpy as np
import tensorflow as tf
from doctr.models.recognition.parseq.tensorflow import parseq
from doctr.transforms.functional import resize, normalize

# 1. Load the TF-PARSeq model (docTR v0.8.0)
model = parseq(pretrained=True)

# 2. Prepare a list of cropped word images
imgs = [cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB) for path in ['/content/21320019885_278_old_meter_final_kwh_evidence.png', '/content/22510058722_544_old_meter_final_kwh_evidence.png']]

# 3. Preprocess: resize to 32×128 px and normalize
processed = [normalize(resize(img, size=(32, 128))) for img in imgs]

# 4. Stack into a batch tensor of shape (batch_size, 32, 128, 3)
batch = tf.stack([tf.convert_to_tensor(inp, dtype=tf.float32) for inp in processed], axis=0)

# 5. Run inference to get raw logits
logits = model(batch, training=False)

# 6. Decode: softmax + argmax to get token IDs
probs = tf.nn.softmax(logits, axis=-1)
token_ids = tf.argmax(probs, axis=-1).numpy()

# 7. Map token IDs → characters using the model’s vocabulary
vocab = model.cfg['vocab']       # List of characters
eos_idx = model.cfg['eos_token'] # Index of EOS token
decoded = []
for seq in token_ids:
    chars = []
    for idx in seq:
        if idx == eos_idx:
            break
        if idx < len(vocab):
            chars.append(vocab[idx])
    decoded.append(''.join(chars))

print(decoded)  # List of recognized strings per image


ImportError: cannot import name 'resize' from 'doctr.transforms.functional' (/usr/local/lib/python3.11/dist-packages/doctr/transforms/functional/__init__.py)

In [14]:
model.cfg

{'mean': (0.694, 0.695, 0.693),
 'std': (0.299, 0.296, 0.301),
 'input_shape': (32, 128, 3),
 'vocab': '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~°£€¥¢฿àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ',
 'url': 'https://doctr-static.mindee.com/models?id=v0.6.0/parseq-24cf693e.zip&src=0'}

In [17]:
from doctr.models import recognition_predictor

# Load TF PARSeq-only predictor
predictor = recognition_predictor('parseq', pretrained=True)

In [20]:
import cv2

# Read and convert a crop (BGR→RGB)
img = cv2.cvtColor(cv2.imread('/content/22510058722_544_old_meter_final_kwh_evidence.png'), cv2.COLOR_BGR2RGB)

# Inference on a single crop
predictions = predictor([img])


In [21]:
predictions

[('00.023', 0.8198116421699524)]

In [23]:
# e.g. a dummy image tensor of shape [1, 3, H, W]
import torch
example = torch.randn(1, 3, 32, 128)

# Torch to ONNX - Failed

In [1]:
!git clone https://github.com/baudm/parseq.git
# !pip install -r parseq/requirements/core.txt
!pip install -e parseq/

Cloning into 'parseq'...
remote: Enumerating objects: 612, done.[K
remote: Counting objects: 100% (311/311), done.[K
remote: Compressing objects: 100% (118/118), done.[K
remote: Total 612 (delta 241), reused 193 (delta 193), pack-reused 301 (from 2)[K
Receiving objects: 100% (612/612), 1.34 MiB | 6.08 MiB/s, done.
Resolving deltas: 100% (343/343), done.
Obtaining file:///content/parseq
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: strhub
  Building editable for strhub (pyproject.toml) ... [?25l[?25hdone
  Created wheel for strhub: filename=strhub-1.2.0-0.editable-py3-none-any.whl size=17093 sha256=1e0dfb4d480213c3bef23681b00a15012bd4bed68157c82a1630201b069839f2
  Stored in directory: /tmp/pip-ephem-wheel-cache-dv8tjdqq/wheels/81/19/76

In [None]:
!pip install pytorch-lightning==2.2.0.post0
!pip install lightning-utilities==0.10.1
!pip install onnx

In [1]:
from strhub.models.utils import load_from_checkpoint
import torch

device = torch.device('cpu')

# To ONNX
onnx_path = "parseq_test.onnx"
img = torch.randn(1, 3, 32, 128)

# parseq = load_from_checkpoint(ckpt_path)
parseq = torch.hub.load('baudm/parseq', 'parseq', pretrained=True).eval()
parseq.refine_iters = 0
parseq.decode_ar = False
parseq = parseq.to(device).eval()

parseq.to_onnx(onnx_path, img, do_constant_folding=True, opset_version=14)  # opset v14 or newer is required


Using cache found in /root/.cache/torch/hub/baudm_parseq_main
  assert condition, message
  if testing and (tgt_in == tokenizer.eos_id).any(dim=-1).all():


In [7]:
import onnx
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model, full_check=True)

In [10]:
# !pip install onnx2tf
# !pip install onnx-graphsurgeon
# !pip install ai-edge-litert
# !pip install sng4onnx

Collecting sng4onnx
  Downloading sng4onnx-1.0.4-py3-none-any.whl.metadata (4.6 kB)
Downloading sng4onnx-1.0.4-py3-none-any.whl (5.9 kB)
Installing collected packages: sng4onnx
Successfully installed sng4onnx-1.0.4


In [None]:
!pip install onnx==1.17.0
!pip install onnxruntime==1.18.1
!pip install onnx-simplifier==0.4.33 #or 0.4.30 (onnx.onnx_cpp2py_export.shape_inference.InferenceError: [ShapeInferenceError] (op_type:Slice, node name: /xxxx/Slice): [ShapeInferenceError] Inferred shape and existing shape differ in rank: (x) vs (y))
!pip install onnx_graphsurgeon
!pip install simple_onnx_processing_tools
!pip install tensorflow==2.19.0 # , Special bugs: #436
!pip install tf-keras==2.19.0
!pip install ai-edge-litert==1.2.0
!pip install psutil==5.9.5
!pip install ml_dtypes==0.5.1
# flatbuffers-compiler (Optional, Only when using the -coion option. Executable file named flatc.)
!pip install flatbuffers>=23.1.21

In [None]:
!onnx2tf -i parseq_test.onnx -o saved_model \
  -prf replace.json \
  -b 1


In [6]:
!git clone https://github.com/PINTO0309/onnx2tf.git



Cloning into 'onnx2tf'...
remote: Enumerating objects: 12431, done.[K
remote: Counting objects: 100% (127/127), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 12431 (delta 80), reused 75 (delta 61), pack-reused 12304 (from 3)[K
Receiving objects: 100% (12431/12431), 3.96 MiB | 7.99 MiB/s, done.
Resolving deltas: 100% (9113/9113), done.


In [7]:
%cd onnx2tf
# pip install -e .

/content/onnx2tf


In [8]:
!pip install -e .

Obtaining file:///content/onnx2tf
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: onnx2tf
  Attempting uninstall: onnx2tf
    Found existing installation: onnx2tf 1.27.4
    Uninstalling onnx2tf-1.27.4:
      Successfully uninstalled onnx2tf-1.27.4
  Running setup.py develop for onnx2tf
Successfully installed onnx2tf-1.27.4


In [15]:
!cp /content/parseq_test.onnx /content/drive/MyDrive/OCR

In [11]:

!onnx2tf -i ../parseq_test.onnx -o parseq_tf_saved_model


E0000 00:00:1747299523.556079   28348 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747299523.563652   28348 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747299523.584424   28348 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747299523.584481   28348 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747299523.584487   28348 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747299523.584493   28348 computation_placer.cc:177] computation placer already registered. Please check linka

# Parseq to direct torchscript

In [1]:
!pip freeze | grep parseq

In [None]:
!git clone https://github.com/baudm/parseq.git
# !pip install -r parseq/requirements/core.txt
!pip install pytorch-lightning==2.2.0.post0
!pip install lightning-utilities==0.10.1
!pip install onnx




In [3]:
!pip install -e parseq/

Obtaining file:///content/parseq
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: strhub
  Building editable for strhub (pyproject.toml) ... [?25l[?25hdone
  Created wheel for strhub: filename=strhub-1.2.0-0.editable-py3-none-any.whl size=17093 sha256=777a015f48478a8761ae65e414630b849ffa1df5fe34096e34fc05f5f69525cf
  Stored in directory: /tmp/pip-ephem-wheel-cache-mxkzkjtc/wheels/81/19/76/37967e8554a579fd80aaee8b99a0ae0349205b8eae49b433e6
Successfully built strhub
Installing collected packages: strhub
Successfully installed strhub-1.2.0


In [1]:
import torch
from torch import nn
from parseq.strhub.models.parseq.model import PARSeq



In [2]:

par_seq_model = PARSeq(
    num_tokens = 97,
        max_label_length = 25,
        img_size = [32, 128],
        patch_size = [4, 8],
        embed_dim = 384,
        enc_num_heads = 6,
        enc_mlp_ratio = 4,
        enc_depth = 12,
        dec_num_heads = 12,
        dec_mlp_ratio = 4,
        dec_depth = 1,
        decode_ar = False,
        refine_iters = 0,
        dropout = 0.1,
)

initializing my class finally


In [3]:
state_dict = torch.load("./drive/MyDrive/OCR/parseq_weights.pth")
par_seq_model.load_state_dict(state_dict, strict=True)

<All keys matched successfully>

In [4]:
from typing import NamedTuple
from torch import nn
import torch

class Tokenizer(NamedTuple):
    pad_id: int
    bos_id: int
    eos_id: int

class MyModel(nn.Module):
    def __init__(self, base_model: nn.Module):
        super().__init__()
        self.model = base_model
        self.model.decode_ar = False
        self.model.refine_iters = 0
        self.max_length = 14
        # TorchScript‐friendly tokenizer
        self.tokenizer = Tokenizer(pad_id=96, bos_id=95, eos_id=0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Now both inputs and attributes are TorchScript types
        return self.model( x, self.max_length)


parseq_my_model = MyModel(par_seq_model).eval()

In [6]:
x = torch.randn(1, 3, 32, 128)
logits = parseq_my_model(x)
logits

tensor([[[-2.0615,  0.1621,  1.6718,  ..., -2.3501, -4.5227, -3.5876],
         [ 1.5534,  1.2274,  0.7286,  ..., -3.9266, -5.8672, -5.1794],
         [ 1.6115,  0.6731,  0.5356,  ..., -4.1488, -5.9314, -5.2665],
         ...,
         [ 4.2428,  0.8883,  0.6430,  ..., -2.8348, -3.8989, -4.5093],
         [ 4.2578,  0.7649,  0.3414,  ..., -3.2632, -4.2993, -4.5722],
         [ 3.9911,  0.6809,  0.4267,  ..., -3.2895, -4.0956, -4.5353]]],
       grad_fn=<ViewBackward0>)

In [7]:
scripted = torch.jit.script(parseq_my_model)



In [8]:
scripted

RecursiveScriptModule(
  original_name=MyModel
  (model): RecursiveScriptModule(
    original_name=PARSeq
    (encoder): RecursiveScriptModule(
      original_name=Encoder
      (patch_embed): RecursiveScriptModule(
        original_name=PatchEmbed
        (proj): RecursiveScriptModule(original_name=Conv2d)
        (norm): RecursiveScriptModule(original_name=Identity)
      )
      (pos_drop): RecursiveScriptModule(original_name=Dropout)
      (patch_drop): RecursiveScriptModule(original_name=Identity)
      (norm_pre): RecursiveScriptModule(original_name=Identity)
      (blocks): RecursiveScriptModule(
        original_name=Sequential
        (0): RecursiveScriptModule(
          original_name=Block
          (norm1): RecursiveScriptModule(original_name=LayerNorm)
          (attn): RecursiveScriptModule(
            original_name=Attention
            (qkv): RecursiveScriptModule(original_name=Linear)
            (q_norm): RecursiveScriptModule(original_name=Identity)
            (k_n

In [9]:
x = torch.randn(1, 3, 32, 128)
scripted(x)

tensor([[[-2.1022, -0.2115,  1.1537,  ..., -2.6167, -4.4884, -3.1685],
         [ 1.2114,  1.4041,  0.8940,  ..., -3.9037, -6.0341, -5.1798],
         [ 1.6055,  0.9235,  0.8611,  ..., -4.2728, -6.2771, -5.2121],
         ...,
         [ 4.4171,  0.6806,  0.6736,  ..., -2.6800, -3.9092, -4.7091],
         [ 4.3226,  0.6456,  0.4431,  ..., -3.1827, -4.3240, -4.7104],
         [ 3.9975,  0.6167,  0.5913,  ..., -3.2849, -4.1735, -4.5974]]],
       grad_fn=<ViewBackward0>)

In [10]:
orig_model = torch.hub.load('baudm/parseq', 'parseq', pretrained=True).eval()

Downloading: "https://github.com/baudm/parseq/zipball/main" to /root/.cache/torch/hub/main.zip


initializing my class finally


Downloading: "https://github.com/baudm/parseq/releases/download/v1.0.0/parseq-bb5792a6.pt" to /root/.cache/torch/hub/checkpoints/parseq-bb5792a6.pt
100%|██████████| 91.0M/91.0M [00:01<00:00, 84.8MB/s]


In [13]:
out = orig_model.model(x, 14)
out

tensor([[[-2.1022, -0.2115,  1.1537,  ..., -2.6167, -4.4884, -3.1685],
         [ 1.2114,  1.4041,  0.8940,  ..., -3.9037, -6.0341, -5.1798],
         [ 1.6055,  0.9235,  0.8611,  ..., -4.2728, -6.2771, -5.2121],
         ...,
         [ 4.4171,  0.6806,  0.6736,  ..., -2.6800, -3.9092, -4.7091],
         [ 4.3226,  0.6456,  0.4431,  ..., -3.1827, -4.3240, -4.7104],
         [ 3.9975,  0.6167,  0.5913,  ..., -3.2849, -4.1735, -4.5974]]],
       grad_fn=<ViewBackward0>)

In [14]:
scripted.save("parseq_scripted.pt")

# 2. Load it back
loaded = torch.jit.load("parseq_scripted.pt").eval()


In [15]:
loaded_out = loaded(x)
loaded_out

tensor([[[-2.1022, -0.2115,  1.1537,  ..., -2.6167, -4.4884, -3.1685],
         [ 1.2114,  1.4041,  0.8940,  ..., -3.9037, -6.0341, -5.1798],
         [ 1.6055,  0.9235,  0.8611,  ..., -4.2728, -6.2771, -5.2121],
         ...,
         [ 4.4171,  0.6806,  0.6736,  ..., -2.6800, -3.9092, -4.7091],
         [ 4.3226,  0.6456,  0.4431,  ..., -3.1827, -4.3240, -4.7104],
         [ 3.9975,  0.6167,  0.5913,  ..., -3.2849, -4.1735, -4.5974]]],
       grad_fn=<ViewBackward0>)

In [16]:
loaded_out.cpu().detach().numpy() == out.cpu().detach().numpy()

array([[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]]])

In [17]:
!cp /content/parseq_scripted.pt /content/drive/MyDrive/OCR/parseq_model_file/