In [2]:
from typing import cast
import torch
from transformers import AutoImageProcessor, Dinov2WithRegistersConfig
from PIL import Image
import torch.nn.functional as F
from model import CustomDinoV2ClassifierWithReg
from config import BASE_MODEL_NAME, NUM_CLASSES , HIDDEN_DIM

checkpoint_path = "./configs/"

# Load processor and fine-tuned DinoV2
processor = AutoImageProcessor.from_pretrained(BASE_MODEL_NAME, cache_dir="./cache", use_fast=True)
config = cast(Dinov2WithRegistersConfig, Dinov2WithRegistersConfig.from_pretrained(checkpoint_path, cache_dir="./cache"))
model = CustomDinoV2ClassifierWithReg.from_pretrained(
    checkpoint_path,
    config=config,
    num_classes=NUM_CLASSES,
    hidden_dim=HIDDEN_DIM,
    cache_dir="./cache"
)

# If our model is only weight (Not exported with base model, we should load the weight)
'''
# Load weights from checkpoint
checkpoint_path = "./backup/results/checkpoint-602/model.safetensors"
state_dict = load_file(checkpoint_path)
model.load_state_dict(state_dict)
'''

# Set model to eval mode and move to GPU if available
model.eval()
model.to('cpu') # type: ignore

# Test an image
image = Image.open(r".\Dataset\Acridotheres javanicus\Javan Myna_Acridotheres javanicus_1.jpg").convert("RGB")
inputs = processor(images=image, return_tensors="pt").to('cpu')['pixel_values']  # type: ignore
print(f"{inputs.shape, inputs.dtype}")

torch.onnx.export(
    model,                             # model
    (inputs, ),                         # input tuple
    "./quant/basefp32.onnx",                     # output path
    export_params=True,
    opset_version=20,
    input_names=["pixel_values"],             # <- match the actual argument name
    output_names=["logits"],
    dynamic_shapes={
        "pixel_values": {0: "batch_size"}, # Only input is allowed here
    },
    dynamo=True,
    optimize=True,
    report=False,
    do_constant_folding=True,
)

(torch.Size([1, 3, 224, 224]), torch.float32)
[torch.onnx] Obtain model graph for `CustomDinoV2ClassifierWithReg([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `CustomDinoV2ClassifierWithReg([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 6 of general pattern rewrite rules.


ONNXProgram(
    model=
        <
            ir_version=10,
            opset_imports={'': 20},
            producer_name='pytorch',
            producer_version='2.7.1+cu128',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"pixel_values"<FLOAT,[1,3,224,224]>
            ),
            outputs=(
                %"logits"<FLOAT,[1,131]>
            ),
            initializers=(
                %"backbone.embeddings.cls_token"<FLOAT,[1,1,768]>{TorchTensor(...)},
                %"backbone.embeddings.register_tokens"<FLOAT,[1,4,768]>{TorchTensor(...)},
                %"backbone.embeddings.position_embeddings"<FLOAT,[1,1370,768]>{TorchTensor(...)},
                %"backbone.embeddings.patch_embeddings.projection.weight"<FLOAT,[768,3,14,14]>{TorchTensor(...)},
                %"backbone.embeddings.patch_embeddings.projection.bias"<FLOAT,[768]>{TorchTensor(...)},
                %"backbon

In [23]:
import onnx

# Load ONNX model
model = onnx.load("dinov2_classify_slimmed.onnx")

# Print all nodes (layers/ops)
for i, node in enumerate(model.graph.node):
    print(f"#{i+1}: {node.op_type} - inputs: {node.input} - outputs: {node.output}")


#1: Conv - inputs: ['pixel_values', 'backbone.embeddings.patch_embeddings.projection.weight', 'backbone.embeddings.patch_embeddings.projection.bias'] - outputs: ['conv2d']
#2: Transpose - inputs: ['backbone.encoder.layer.0.attention.attention.key.weight'] - outputs: ['val_91']
#3: Transpose - inputs: ['backbone.encoder.layer.0.attention.attention.value.weight'] - outputs: ['val_95']
#4: Transpose - inputs: ['backbone.encoder.layer.0.attention.attention.query.weight'] - outputs: ['val_98']
#5: Transpose - inputs: ['backbone.encoder.layer.0.attention.output.dense.weight'] - outputs: ['val_129']
#6: Transpose - inputs: ['backbone.encoder.layer.0.mlp.fc1.weight'] - outputs: ['val_133']
#7: Transpose - inputs: ['backbone.encoder.layer.0.mlp.fc2.weight'] - outputs: ['val_142']
#8: Transpose - inputs: ['backbone.encoder.layer.1.attention.attention.key.weight'] - outputs: ['val_146']
#9: Transpose - inputs: ['backbone.encoder.layer.1.attention.attention.value.weight'] - outputs: ['val_149']
#1

In [24]:
print("Inputs:")
for input_tensor in model.graph.input:
    print(f"- {input_tensor.name}")

print("Outputs:")
for output_tensor in model.graph.output:
    print(f"- {output_tensor.name}")


Inputs:
- pixel_values
Outputs:
- logits


In [13]:
import onnx

# Optional: run shape inference to enrich shape info
from onnx import shape_inference
model = shape_inference.infer_shapes(model)

# Inspect graph inputs
print("Model Inputs:")
for inp in model.graph.input:
    name = inp.name
    dims = inp.type.tensor_type.shape.dim
    shape = [
        (d.dim_value if d.HasField("dim_value") else f"{d.dim_param if d.HasField('dim_param') else '?'}")
        for d in dims
    ]
    print(f" • {name}: {shape}")

# Inspect graph outputs
print("\nModel Outputs:")
for out in model.graph.output:
    name = out.name
    dims = out.type.tensor_type.shape.dim
    shape = [
        (d.dim_value if d.HasField("dim_value") else f"{d.dim_param if d.HasField('dim_param') else '?'}")
        for d in dims
    ]
    print(f" • {name}: {shape}")


Model Inputs:
 • pixel_values: [1, 3, 224, 224]

Model Outputs:
 • logits: [1, 131]


In [25]:
from onnx import checker

try:
    checker.check_model(model)
    print("✅ Model passed ONNX validation.")
except onnx.checker.ValidationError as e:
    print("❌ Model failed ONNX validation:")
    print(e)

print("IR version:", model.ir_version)
for opset in model.opset_import:
    print("Opset version for domain '{}': {}".format(opset.domain, opset.version))

✅ Model passed ONNX validation.
IR version: 10
Opset version for domain '': 20


In [55]:
import onnx

model = onnx.load("custom_dinov2_slimmed_antialias.onnx")
model.ir_version = 10  # Downgrade from 11 to 10
onnx.save(model, "custom_dinov2_slimmed_antialias.onnx")


In [None]:
import onnxruntime as ort
from transformers import AutoImageProcessor
from PIL import Image
import numpy as np
from config import BASE_MODEL_NAME, NUM_CLASSES , HIDDEN_DIM

# Constants (same as in PyTorch)
IMAGE_PATH = r".\Dataset\Acridotheres javanicus\Javan Myna_Acridotheres javanicus_1.jpg"
ONNX_PATH = "dinov2_classify_slimmed.onnx"  # Path to the ONNX model

# Preprocess image using HuggingFace processor
processor = AutoImageProcessor.from_pretrained(BASE_MODEL_NAME, cache_dir="./cache", use_fast=False)
image = Image.open(IMAGE_PATH).convert("RGB")
inputs = processor(images=image, return_tensors="np")  # use numpy instead of torch

# ONNX expects float32 inputs
pixel_values = inputs["pixel_values"].astype(np.float32)  # shape: (1, 3, 224, 224)

# Run inference
session = ort.InferenceSession(ONNX_PATH, providers=["CPUExecutionProvider"])  # or "CUDAExecutionProvider"
input_name = session.get_inputs()[0].name  # 'pixel_values'
output_name = session.get_outputs()[0].name  # 'logits'

outputs = session.run([output_name], {input_name: pixel_values})  # list of outputs

# Softmax and prediction
logits = np.array(outputs[0])
exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))  # stable softmax
probs = exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
pred_id = int(np.argmax(probs))
confidence = float(probs[0][pred_id])

print(f"Predicted class ID: {pred_id}, Confidence: {confidence:.4f}")


Predicted class ID: 0, Confidence: 0.7195


In [29]:
import onnx
import onnx_graphsurgeon as gs

# Load the model
model_path = "./quant/basefp32.onnx"
graph = gs.import_onnx(onnx.load(model_path))

# Update the `antialias` attribute in Resize nodes
for node in graph.nodes:
    if node.op == "Resize" and "antialias" in node.attrs:
        print(f"Before: {node.name} -> antialias = {node.attrs['antialias']}")
        node.attrs["antialias"] = 1
        print(f"After:  {node.name} -> antialias = {node.attrs['antialias']}")

# Cleanup and export
graph.cleanup()
onnx.save(gs.export_onnx(graph), "./quant/basefp32_slimmed.onnx")


Before: node_Resize_57 -> antialias = 0
After:  node_Resize_57 -> antialias = 1


In [52]:
from onnx import checker
model_path = "./quant/basefp32_slimmed.onnx"
model = onnx.load(model_path)
graph = gs.import_onnx(model)

for node in graph.nodes:
    if "antialias" in node.attrs:
        print(node.name, node.op, node.attrs)

try:
    checker.check_model(model, full_check=True, check_custom_domain=True)
    print("✅ Model passed ONNX validation.")
except onnx.checker.ValidationError as e:
    print("❌ Model failed ONNX validation:")
    print(e)
print("IR version:", model.ir_version)



node_Resize_57 Resize OrderedDict({'mode': 'cubic', 'cubic_coeff_a': -0.75, 'coordinate_transformation_mode': 'pytorch_half_pixel', 'exclude_outside': 0, 'nearest_mode': 'floor', 'antialias': 1, 'extrapolation_value': 0.0, 'keep_aspect_ratio_policy': 'stretch'})
✅ Model passed ONNX validation.
IR version: 10


In [51]:

exported_model = gs.export_onnx(graph)

# Copy IR and opset version from original
exported_model.ir_version = 10
#del exported_model.opset_import[:]
#exported_model.opset_import.extend(model.opset_import)

try:
    onnx.checker.check_model(exported_model)
    print("✅ Model passed ONNX validation.")
except onnx.checker.ValidationError as e:
    print("❌ Model failed ONNX validation:")
    print(e)


print("IR version:", exported_model.ir_version)
for opset in exported_model.opset_import:
    print("Opset version for domain '{}': {}".format(opset.domain, opset.version))

onnx.save(exported_model, "./quant/basefp32_slimmed.onnx")


✅ Model passed ONNX validation.
IR version: 10
Opset version for domain '': 20


In [15]:
import onnx
import onnx_graphsurgeon as gs

# Load the model
model_path = "model_fp16.onnx"
graph = gs.import_onnx(onnx.load(model_path))

print("input")
for inp in graph.inputs:
    print(inp)

print("node conv1: ")
for node in graph.nodes:
    if node.name == "node_Conv_1":
        print(node)



input
Variable (pixel_values): (shape=[1, 3, 224, 224], dtype=float32)
node conv1: 
node_Conv_1 (Conv)
	Inputs: [
		Variable (graph_input_cast_0): (shape=[1, 3, 224, 224], dtype=float16)
		Constant (backbone.embeddings.patch_embeddings.projection.weight): (shape=[768, 3, 14, 14], dtype=float16)
		Constant (backbone.embeddings.patch_embeddings.projection.bias): (shape=[768], dtype=float16)
	]
	Outputs: [
		Variable (conv2d): (shape=[1, 768, 16, 16], dtype=float16)
	]
Attributes: OrderedDict({'auto_pad': 'NOTSET', 'dilations': [1, 1], 'group': 1, 'pads': [0, 0, 0, 0], 'strides': [14, 14]})


In [18]:
from onnx import checker

model = onnx.load("model_nhwc_input.onnx")
try:
    checker.check_model(model)
    print("✅ Model passed ONNX validation.")
except onnx.checker.ValidationError as e:
    print("❌ Model failed ONNX validation:")
    print(e)

print("IR version:", model.ir_version)
for opset in model.opset_import:
    print("Opset version for domain '{}': {}".format(opset.domain, opset.version))

✅ Model passed ONNX validation.
IR version: 10
Opset version for domain '': 20


In [25]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np

def insert_nhwc_input_transpose(model_path="model_fp16.onnx", output_path="model_nhwc_fixed.onnx"):
    # Load model and graph
    model = onnx.load(model_path)
    graph = gs.import_onnx(model)

    # Find the original input
    input_var = graph.inputs[0]  # Assumes only one input
    assert input_var.name == "pixel_values", f"Expected input named 'pixel_values', got {input_var.name}"

    # Update input shape to NHWC
    input_var.shape = [1, 224, 224, 3]
    input_var.dtype = np.float32  # keep as float32

    # Find the original Cast node
    cast_node = next((n for n in graph.nodes if n.op == "Cast" and n.outputs[0].name == "graph_input_cast_0"), None)
    assert cast_node is not None, "Original Cast node not found."

    # Create new intermediate variable
    nhwc_to_nchw_out = gs.Variable(name="transposed_nchw", dtype=np.float32, shape=[1, 3, 224, 224])

    # Create new transpose node (NHWC → NCHW)
    transpose_node = gs.Node(
        op="Transpose",
        name="Transpose_NHWC_to_NCHW",
        inputs=[input_var],
        outputs=[nhwc_to_nchw_out],
        attrs={"perm": [0, 3, 1, 2]}
    )

    # Patch Cast node to take Transpose output
    cast_node.inputs[0] = nhwc_to_nchw_out

    # Insert Transpose node before Cast
    graph.nodes.insert(0, transpose_node)

    # Cleanup and export
    graph.cleanup().toposort()
    onnx.save(gs.export_onnx(graph), output_path)
    print(f"✅ Fixed model saved to: {output_path}")

if __name__ == "__main__":
    insert_nhwc_input_transpose()


✅ Fixed model saved to: model_nhwc_fixed.onnx


In [44]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np

def insert_nhwc_to_nchw_transpose(
    model_path="./quant/basefp32_slimmed.onnx",
    output_path="./quant/basefp32_slimmed.onnx",
    input_name="pixel_values"
):
    # Load model and graph
    model = onnx.load(model_path)
    graph = gs.import_onnx(model)

    # Find input
    input_var = next((inp for inp in graph.inputs if inp.name == input_name), None)
    assert input_var is not None, f"❌ Input '{input_name}' not found."
    
    # Change input shape to NHWC
    input_var.shape = [1, 224, 224, 3]
    input_var.dtype = np.float32

    # Create transpose output variable
    transposed = gs.Variable(name="pixel_values_nchw", dtype=np.float32, shape=[1, 3, 224, 224])

    # Create transpose node (NHWC → NCHW)
    transpose_node = gs.Node(
        op="Transpose",
        name="Transpose_NHWC_to_NCHW",
        inputs=[input_var],
        outputs=[transposed],
        attrs={"perm": [0, 3, 1, 2]},
    )

    # Redirect all nodes that used input_var to use transposed
    for node in graph.nodes:
        node.inputs = [transposed if inp is input_var else inp for inp in node.inputs]

    # Add transpose to graph
    graph.nodes.insert(0, transpose_node)

    # Optional: clean graph
    graph.cleanup().toposort()

    # Save the fixed model
    onnx.save(gs.export_onnx(graph), output_path)
    print(f"✅ Saved transposed model to: {output_path}")

if __name__ == "__main__":
    insert_nhwc_to_nchw_transpose()


✅ Saved transposed model to: ./quant/basefp32_slimmed.onnx


In [42]:
import onnx
import onnx_graphsurgeon as gs

def forward_trace_to_target(model_path="./quant/basefp32_slimmed.onnx", target_name="node_Conv_1"):
    model = onnx.load(model_path)
    graph = gs.import_onnx(model)

    # Map input name → consumer node(s)
    input_to_consumers = {}
    for node in graph.nodes:
        for inp in node.inputs:
            input_to_consumers.setdefault(inp.name, []).append(node)

    visited = set()
    path = []

    def dfs_forward(var_name):
        consumers = input_to_consumers.get(var_name, [])
        for consumer in consumers:
            if consumer.name in visited:
                continue
            visited.add(consumer.name)
            path.append(consumer)

            if consumer.name == target_name:
                return True  # Stop once we reach target

            # Continue recursively through this node’s outputs
            for out in consumer.outputs:
                if dfs_forward(out.name):
                    return True
        return False

    # Start from all model inputs
    for inp in graph.inputs:
        if dfs_forward(inp.name):
            break

    # Print path
    print(f"\n🧵 Path from model input to `{target_name}`:")
    for node in path:
        print(f"\n🔹 Node: {node.name} ({node.op})")
        for i, inp in enumerate(node.inputs):
            print(f"  Input[{i}]: {inp.name}, shape={inp.shape}, dtype={inp.dtype}")
        for i, out in enumerate(node.outputs):
            print(f"  Output[{i}]: {out.name}, shape={out.shape}, dtype={out.dtype}")
        if node.attrs:
            print(f"  Attributes: {dict(node.attrs)}")

if __name__ == "__main__":
    forward_trace_to_target()



🧵 Path from model input to `node_Conv_1`:

🔹 Node: node_Conv_1 (Conv)
  Input[0]: pixel_values, shape=[1, 3, 224, 224], dtype=float32
  Input[1]: backbone.embeddings.patch_embeddings.projection.weight, shape=[768, 3, 14, 14], dtype=float32
  Input[2]: backbone.embeddings.patch_embeddings.projection.bias, shape=[768], dtype=float32
  Output[0]: conv2d, shape=[1, 768, 16, 16], dtype=float32
  Attributes: {'group': 1, 'auto_pad': 'NOTSET', 'dilations': [1, 1], 'strides': [14, 14], 'pads': [0, 0, 0, 0]}
