# Compilation du modèle avec ONNX

## 1 - Classes et fonctions

In [1]:
import torch
import torchvision
from collections import OrderedDict

In [3]:
class ReshapeToBatchChannelFirst(torch.nn.Module):
    def __init__(self):
        super(ReshapeToBatchChannelFirst, self).__init__()

    def forward(self, x):
        # Ensure the input is of shape (224, 224, 3)
        #assert x.dim() == 3 and x.shape[-1] == 3, "Input must be (224, 224, 3)"
        
        # Permute dimensions from (H, W, C) to (C, H, W)
        x = x.permute(2, 0, 1)
        
        # Add a batch dimension at the beginning: (1, C, H, W)
        x = x.unsqueeze(0)
        return x

# Example usage
x = torch.rand(224, 224, 3)  # Example input
layer = ReshapeToBatchChannelFirst()
output = layer(x)
print(output.shape)  # Expected: torch.Size([1, 3, 224, 224])

torch.Size([1, 3, 224, 224])


In [14]:
class FixedNormLayer(torch.nn.Module):
    def __init__(self, scale=torch.tensor([1 / 256]), mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])):
        """
        Args:
            mean (torch.Tensor): Precomputed mean for normalization.
            std (torch.Tensor): Precomputed standard deviation for normalization.
        """
        super(FixedNormLayer, self).__init__()
        self.register_buffer("mean", mean[:, None, None])
        self.register_buffer("std", std[:, None, None])
        self.register_buffer("scale", scale)

    def forward(self, x):
        return (self.scale * x - self.mean) / self.std

# Example usage
layer = FixedNormLayer()

# Test with a sample input
x = torch.rand(1, 3, 1, 1)  # Example input
output = layer(x)
print(output)

tensor([[[[-2.1140]],

         [[-2.0340]],

         [[-1.7985]]]])


In [19]:
class InferenceModel(torch.nn.Module):
    def __init__(self, model, scale=torch.tensor([1 / 256]), mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])):
        super(InferenceModel, self).__init__()
        self.preprocess = torch.nn.Sequential(
            OrderedDict(
                [
                    ("reshape", ReshapeToBatchChannelFirst()),
                    ("normalize", FixedNormLayer(scale, mean, std)),
                ]
            )
        )
        self.model = model  # The main model
        self.postprocess = torch.nn.Softmax(1)

    def forward(self, x):
        x = self.preprocess(x)  # Apply reshaping and normalization
        x = self.model(x)  # Pass to the main model
        return self.postprocess(x)

In [20]:
class InferenceModelEncoder(torch.nn.Module):
    def __init__(self, model, scale=torch.tensor([1 / 256]), mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])):
        super(InferenceModelEncoder, self).__init__()
        self.preprocess = torch.nn.Sequential(
            OrderedDict(
                [
                    ("reshape", ReshapeToBatchChannelFirst()),
                    ("normalize", FixedNormLayer(scale, mean, std)),
                ]
            )
        )
        self.model = model  # The main model

    def forward(self, x):
        x = self.preprocess(x)  # Apply reshaping and normalization
        x = self.model(x)  # Pass to the main model
        return x

In [7]:
class InferenceModelDecoder(torch.nn.Module):
    def __init__(self, model):
        super(InferenceModelDecoder, self).__init__()
        self.model = model  # The main model last layer
        self.postprocess = torch.nn.Softmax(1)

    def forward(self, x):
        x = self.model(x)  # Pass to the main model
        return self.postprocess(x)

In [25]:
import pathlib
from onnxruntime.tools import convert_onnx_models_to_ort as convert_onnx

def export_to_onnx(model, model_input, model_name, path=".", target_platform="arm"):
    path = pathlib.Path(path)

    # Export to ONNX format
    onnx_program = torch.onnx.export(model, model_input, dynamo=True)
    onnx_program.save((path / model_name).with_suffix(".onnx"))

    # Export to ORT format for mobile
    convert_onnx.convert_onnx_models_to_ort(
        (path / model_name).with_suffix(".onnx"),
        output_dir=pathlib.Path("models"),
        optimization_styles=[convert_onnx.OptimizationStyle.Fixed],
        target_platform=target_platform,
    )

In [27]:
import PIL
import numpy as np

def single_image_pipeline(image_path, dtype="float32"):
    # Load image into numpy float array
    image = np.array(
        PIL.Image.open(image_path).convert("RGB").resize((224, 224)), dtype=dtype
    )

    return image

## 2 - Modèle de tomates (2025-02-28)

Modification du modèle pour avoir le bon nombre de sortie dans la dernière couche et
pour calculer le softmax sur les sorties du modèle pour avoir directement les probabilités.

In [16]:
# Load pretrained ViT
num_labels = 11  # Get number of labels (e.g., 8)

model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1")  # Load a pretrained model
model.heads.head = torch.nn.Linear(model.heads.head.in_features, num_labels)

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.load_state_dict(
    torch.load(
        "models/tomato_model_2025_02_28_v2.pt",
        map_location=device,
        weights_only=True,
    )
)

<All keys matched successfully>

In [23]:
# Create inference model
inference_model = InferenceModel(model)
inference_model.eval()

model_input = torch.randn(224, 224, 3)
inference_model(model_input)

tensor([[0.0214, 0.6054, 0.2210, 0.0181, 0.0034, 0.0022, 0.0020, 0.0814, 0.0186,
         0.0066, 0.0199]], grad_fn=<SoftmaxBackward0>)

Exportation du modèle, en incluant un tenseur aléatoire pour fournir la bonne taille de
tenseur en entrée.

In [24]:
export_to_onnx(inference_model, model_input, "tomato_model_2025_02_28_v2", "models")

  param_schemas = callee.param_schemas()
  param_schemas = callee.param_schemas()


[torch.onnx] Obtain model graph for `InferenceModel([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModel([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2.ort


[0;93m2025-03-24 11:39:21.050759422 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_11__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:39:21.051144272 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_10__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:39:21.051419147 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_9__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:39:21.051678545 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_8__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:39:21.051

Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'


2025-03-24 11:39:24,795 ort_format_model.utils [INFO] - Created config in /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2.required_operators.config


Validation de l'exécution du modèle

In [28]:
import onnxruntime

ort_session = onnxruntime.InferenceSession(
    "models/tomato_model_2025_02_28_v2.ort", providers=["CPUExecutionProvider"]
)

In [None]:
# Exécution du modèle avec ONNX
onnx_input = single_image_pipeline(
    "dataset/tomato/88614302-e6d2-4327-a4fb-a3db9c9ea72e___YLCV_NREC_2861.JPG"
)

onnxruntime_outputs = ort_session.run(None, {"x": onnx_input})
onnxruntime_outputs

[array([[1.4499956e-06, 9.2473856e-06, 2.0949049e-06, 8.7807521e-06,
         1.5942251e-05, 7.3268457e-06, 1.2185792e-06, 9.3832959e-06,
         7.4934615e-06, 1.7394861e-05, 9.9991953e-01]], dtype=float32)]

In [33]:
# Exécution du modèle avec pytorch
torch_input = torch.tensor(onnx_input)

inference_model(torch_input)

tensor([[1.4500e-06, 9.2474e-06, 2.0949e-06, 8.7807e-06, 1.5942e-05, 7.3268e-06,
         1.2186e-06, 9.3833e-06, 7.4934e-06, 1.7395e-05, 9.9992e-01]],
       grad_fn=<SoftmaxBackward0>)

## 2 - Tomates - Séparation de l'encodeur et du décodeur

In [34]:
# Load pretrained ViT
num_labels = 11  # Get number of labels (e.g., 8)

model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1")  # Load a pretrained model
model.heads.head = torch.nn.Linear(model.heads.head.in_features, num_labels)

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.load_state_dict(
    torch.load(
        "models/tomato_model_2025_02_28_v2.pt",
        map_location=device,
        weights_only=True,
    )
)

<All keys matched successfully>

In [36]:
#Extract last layer
last_layer = torch.nn.Linear(model.heads.head.in_features, num_labels)
last_layer.weight = model.heads.head.weight
last_layer.weight

Parameter containing:
tensor([[ 0.0183, -0.0336, -0.0224,  ..., -0.0234, -0.0337,  0.0222],
        [ 0.0133,  0.0046,  0.0376,  ..., -0.0338, -0.0195, -0.0039],
        [ 0.0117,  0.0335, -0.0085,  ...,  0.0160, -0.0332, -0.0282],
        ...,
        [ 0.0254, -0.0356, -0.0124,  ...,  0.0298,  0.0253, -0.0077],
        [ 0.0218,  0.0312, -0.0024,  ..., -0.0325,  0.0179,  0.0299],
        [-0.0239, -0.0136,  0.0181,  ...,  0.0369, -0.0023, -0.0013]],
       requires_grad=True)

In [37]:
# Remove last layer from model
model.heads.head = torch.nn.Identity()

In [38]:
# Define encoder and decoder inference models
inference_model_encoder = InferenceModelEncoder(model)
inference_model_decoder = InferenceModelDecoder(last_layer)

In [40]:
# Test inference model encoder and decoder
torch_input = torch.randn(224, 224, 3)

encoded = inference_model_encoder(torch_input)
decoded = inference_model_decoder(encoded)
decoded

tensor([[0.0228, 0.5928, 0.2303, 0.0178, 0.0034, 0.0024, 0.0021, 0.0838, 0.0181,
         0.0065, 0.0201]], grad_fn=<SoftmaxBackward0>)

In [43]:
# Export to ONNX
export_to_onnx(inference_model_encoder, torch_input, "tomato_model_2025_02_28_v2_encoder", "models")
export_to_onnx(inference_model_decoder, encoded, "tomato_model_2025_02_28_v2_decoder", "models")

[torch.onnx] Obtain model graph for `InferenceModelEncoder([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModelEncoder([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2_encoder.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2_encoder.ort
Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'


[0;93m2025-03-24 11:54:14.855933978 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_11__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:54:14.856286110 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_10__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:54:14.856528573 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_9__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:54:14.856755973 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_scaled_dot_product_flash_attention_for_cpu_8__1' source:{1,197,32} target:{1,12,197}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:54:14.856

[torch.onnx] Obtain model graph for `InferenceModelDecoder([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModelDecoder([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...


2025-03-24 11:54:18,157 ort_format_model.utils [INFO] - Created config in /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2_decoder.required_operators.config


[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2_decoder.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/tomato_model_2025_02_28_v2_decoder.ort
Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'


Validation de l'exécution du modèle avec ONNX runtime (encoder decoder)

In [44]:
import onnxruntime

ort_session_encoder = onnxruntime.InferenceSession(
    "models/tomato_model_2025_02_28_v2_encoder.ort", providers=["CPUExecutionProvider"]
)

ort_session_decoder = onnxruntime.InferenceSession(
    "models/tomato_model_2025_02_28_v2_decoder.ort", providers=["CPUExecutionProvider"]
)

In [45]:
# Exécution du modèle
onnx_input = single_image_pipeline(
    "dataset/tomato/88614302-e6d2-4327-a4fb-a3db9c9ea72e___YLCV_NREC_2861.JPG"
)

encoded_image = ort_session_encoder.run(None, {"x": onnx_input})[0]
encoded_image

array([[-9.07714486e-01,  3.03961895e-02,  8.70138347e-01,
        -8.18296134e-01, -2.94012278e-01,  3.38016897e-01,
        -1.66627228e-01,  2.50272691e-01,  9.25460935e-01,
        -1.28882408e-01, -4.14819241e-01,  5.26395202e-01,
         3.60564172e-01, -9.18535769e-01,  1.99042767e-01,
         3.08376225e-03,  4.41153377e-01, -9.58642066e-01,
        -7.41269588e-02, -6.60064757e-01, -8.72934103e-01,
         1.00470901e+00, -4.35145259e-01,  5.68119287e-01,
         4.17836487e-01, -1.07043183e+00,  3.48927267e-02,
         8.65398228e-01, -1.04337978e+00,  7.23374665e-01,
         1.12749267e+00, -8.93599689e-02, -9.41086590e-01,
        -1.05196118e+00,  7.97558486e-01, -3.89004469e-01,
         2.09360659e-01,  2.86927879e-01,  8.63989294e-01,
        -1.43604070e-01, -1.26661167e-01, -1.56991780e-01,
        -6.36127830e-01, -1.38813198e-01,  4.89442497e-01,
         6.29252195e-01,  7.34239221e-01, -7.45796412e-02,
        -1.43365824e+00,  4.67331916e-01, -7.53821850e-0

In [46]:
# Decoding
decoded_output = ort_session_decoder.run(None, {"x": encoded_image})
decoded_output

[array([[1.5312373e-06, 9.0133162e-06, 2.1658811e-06, 8.6138307e-06,
         1.5698393e-05, 7.7833420e-06, 1.2598617e-06, 9.6167660e-06,
         7.2367616e-06, 1.6960983e-05, 9.9992025e-01]], dtype=float32)]

In [50]:
# Validation avec pytorch
torch_input = torch.tensor(onnx_input)
encoded_image = inference_model_encoder(torch_input)
decoded = inference_model_decoder(encoded_image)
decoded

tensor([[1.5312e-06, 9.0133e-06, 2.1659e-06, 8.6138e-06, 1.5698e-05, 7.7833e-06,
         1.2599e-06, 9.6167e-06, 7.2367e-06, 1.6961e-05, 9.9992e-01]],
       grad_fn=<SoftmaxBackward0>)

## 3 - Modèle des plantes (MobileNet_v3)

In [51]:
# Load pretrained MobileNet_v3
num_labels = 3  # Get number of labels (e.g., 8)

model = torchvision.models.mobilenet_v3_large(weights="IMAGENET1K_V1")  # Load a pretrained model
model.classifier[3] = torch.nn.Linear(
    model.classifier[3].in_features, num_labels
)  # Modify last layer

In [52]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.load_state_dict(
    torch.load(
        "models/plant_model_2025_03_21.pt",
        map_location=device,
        weights_only=True,
    )
)

<All keys matched successfully>

In [53]:
# Create inference model
inference_model = InferenceModel(model)
inference_model.eval()

test = torch.randn(224, 224, 3)
inference_model(test)

tensor([[0.5631, 0.0999, 0.3371]], grad_fn=<SoftmaxBackward0>)

In [54]:
torch_input = torch.randn(224, 224, 3)
export_to_onnx(inference_model, torch_input, "plant_model_2025_03_21", "models")

[torch.onnx] Obtain model graph for `InferenceModel([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModel([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21.ort
Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'


[0;93m2025-03-24 11:57:08.222956453 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training__1' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:57:08.222985784 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training__2' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:57:08.223138959 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training_1__1' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:57:08.223153063 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training_1__2' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 11:57:08.223303115 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for outp

In [55]:
import onnxruntime

ort_session = onnxruntime.InferenceSession(
    "models/plant_model_2025_03_21.ort", providers=["CPUExecutionProvider"]
)

In [56]:
# Exécution du modèle
onnx_input = single_image_pipeline(
    "dataset/tomato/88614302-e6d2-4327-a4fb-a3db9c9ea72e___YLCV_NREC_2861.JPG"
)

onnxruntime_outputs = ort_session.run(None, {"x": onnx_input})
onnxruntime_outputs

[array([[8.8753900e-08, 9.9999964e-01, 2.4354483e-07]], dtype=float32)]

In [57]:
# Exécution sur pytorch pour comparer
torch_input = torch.tensor(onnx_input)
inference_model(torch_input)

tensor([[8.8754e-08, 1.0000e+00, 2.4355e-07]], grad_fn=<SoftmaxBackward0>)

## 4 - Modèle des plantes (encoder - decoder)

In [58]:
# Load pretrained MobileNet_v3
num_labels = 3  # Get number of labels (e.g., 8)

model = torchvision.models.mobilenet_v3_large(weights="IMAGENET1K_V1")  # Load a pretrained model
model.classifier[3] = torch.nn.Linear(
    model.classifier[3].in_features, num_labels
)  # Modify last layer

In [59]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.load_state_dict(
    torch.load(
        "models/plant_model_2025_03_21.pt",
        map_location=device,
        weights_only=True,
    )
)

<All keys matched successfully>

In [60]:
model

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bi

In [None]:
# Extract classifier
classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=960, out_features=1280, bias=True),
    torch.nn.Hardswish(),
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280, out_features=3, bias=True),
)

for i in range(4):
    if hasattr(classifier[i], "weight"):
        classifier[i].weight = model.classifier[i].weight

In [65]:
# Set model classifier to identity
model.classifier = torch.nn.Identity()

In [83]:
# Define encoder and decoder inference models
inference_model_encoder = InferenceModelEncoder(model)
inference_model_decoder = InferenceModelDecoder(classifier)

inference_model_encoder.eval()
inference_model_decoder.eval()

InferenceModelDecoder(
  (model): Sequential(
    (0): Linear(in_features=960, out_features=1280, bias=True)
    (1): Hardswish()
    (2): Dropout(p=0.2, inplace=True)
    (3): Linear(in_features=1280, out_features=3, bias=True)
  )
  (postprocess): Softmax(dim=1)
)

In [84]:
# Test inference model encoder and decoder
torch_input = torch.randn(224, 224, 3)

encoded = inference_model_encoder(torch_input)
decoded = inference_model_decoder(encoded)
decoded

tensor([[0.5725, 0.0806, 0.3469]], grad_fn=<SoftmaxBackward0>)

In [85]:
# Export to ONNX
export_to_onnx(inference_model_encoder, torch_input, "plant_model_2025_03_21_encoder", "models")
export_to_onnx(inference_model_decoder, encoded, "plant_model_2025_03_21_decoder", "models")

[torch.onnx] Obtain model graph for `InferenceModelEncoder([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModelEncoder([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21_encoder.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21_encoder.ort


[0;93m2025-03-24 12:13:21.028230552 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training__1' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 12:13:21.028278056 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training__2' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 12:13:21.028451079 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training_1__1' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 12:13:21.028467221 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for output. '_native_batch_norm_legit_no_training_1__2' source:{16} target:{0}. Falling back to lenient merge.[m
[0;93m2025-03-24 12:13:21.028612897 [W:onnxruntime:, graph.cc:109 MergeShapeInfo] Error merging shape info for outp

Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'
[torch.onnx] Obtain model graph for `InferenceModelDecoder([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `InferenceModelDecoder([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...


2025-03-24 12:13:22,200 ort_format_model.utils [INFO] - Created config in /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21_decoder.required_operators.config


[torch.onnx] Translate the graph into ONNX... ✅
Converting models with optimization style 'Fixed' and level 'all'
Converting optimized ONNX model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21_decoder.onnx to ORT format model /home/maxime/Documents/Code/happybud/training/models/plant_model_2025_03_21_decoder.ort
Converted 1/1 models successfully.
Generating config file from ORT format models with optimization style 'Fixed' and level 'all'


Validation de l'exécution du modèle avec ONNX runtime (encoder decoder)

In [91]:
import onnxruntime

ort_session_encoder = onnxruntime.InferenceSession(
    "models/plant_model_2025_03_21_encoder.ort", providers=["CPUExecutionProvider"]
)

ort_session_decoder = onnxruntime.InferenceSession(
    "models/plant_model_2025_03_21_decoder.ort", providers=["CPUExecutionProvider"]
)

In [92]:
# Exécution du modèle
onnx_input = single_image_pipeline(
    "dataset/tomato/88614302-e6d2-4327-a4fb-a3db9c9ea72e___YLCV_NREC_2861.JPG"
)

encoded_image = ort_session_encoder.run(None, {"x": onnx_input})[0]
encoded_image

array([[ 1.20295122e-01,  1.10822761e+00,  2.06167065e-02,
         5.83795369e-01,  4.89636898e-01,  1.52733281e-01,
         5.86454690e-01,  5.68893135e-01,  9.34672773e-01,
         1.25026953e+00,  2.63027608e-01,  9.86482501e-01,
        -1.86756060e-01, -9.32499319e-02,  1.06694221e+00,
         2.20941994e-02,  9.75903496e-02,  4.26993161e-01,
         5.14855921e-01,  2.88239986e-01,  5.93492568e-01,
         1.33377290e+00,  7.44929969e-01,  1.84211638e-02,
         7.39451051e-01,  1.00868680e-01,  5.96427202e-01,
        -1.47122145e-01, -6.02106117e-02,  5.33028424e-01,
         7.59140551e-02,  3.94630909e-01,  1.37586761e-02,
         1.46330446e-01,  2.03672722e-02,  7.63610303e-01,
         1.22969377e+00, -1.37557238e-01,  2.83678591e-01,
         2.23187178e-01,  1.00482710e-01,  6.73192292e-02,
         1.07468821e-01, -8.61597806e-02,  9.62428987e-01,
         1.76388443e+00,  6.88523173e-01, -1.79807290e-01,
         1.05469506e-02,  5.82355917e-01,  6.89149229e-0

In [93]:
# Decoding
decoded_output = ort_session_decoder.run(None, {"x": encoded_image})
decoded_output

[array([[8.7371554e-08, 9.9999952e-01, 3.8503146e-07]], dtype=float32)]

In [90]:
# Validation avec pytorch
torch_input = torch.tensor(onnx_input)
encoded_image = inference_model_encoder(torch_input)
decoded = inference_model_decoder(encoded_image)
decoded

tensor([[8.7371e-08, 1.0000e+00, 3.8503e-07]], grad_fn=<SoftmaxBackward0>)