In [1]:
!pip install "transformers==4.54.1" -qU
!pip install optimum[onnxruntime] -qU
!pip install transformers[torch] -qU
!pip install optimum -qU
!pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126 -qU
!pip install -U bitsandbytes -qU
!pip install --no-deps --upgrade timm -qU # Only for Gemma 3N

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0mm
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m558.8/558.8 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 3.6.0 requires fsspec[http]<=2025.3.0,>=2023.1.0, but you have fsspec 2025.5.1 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.5/16.5 MB[0m [31m65.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

Ran out of memory on kaggle. A system with a lot higher RAM would complete the conversion

In [None]:
import torch
from pathlib import Path
from transformers import AutoConfig, AutoModelForCausalLM, PretrainedConfig
from optimum.exporters.onnx import export as onnx_export
from optimum.exporters.onnx.config import TextDecoderOnnxConfig
from optimum.utils import (
    DummyVisionInputGenerator,
    DummyTextInputGenerator,
    DummyPastKeyValuesGenerator,
    NormalizedTextConfig,
    NormalizedVisionConfig,
)
from typing import Dict
import gc

# --- Configuration ---
# Path to the PyTorch model on Kaggle
pytorch_model_path = "/kaggle/input/model-name-auramind-maize-expert-e2b/pytorch/default/1/AuraMind-E2B-Finetuned-Sliced"
# Path where the final ONNX model will be saved
onnx_output_path = "./onnx_multimodal_model"

print(f"PyTorch model path: {pytorch_model_path}")
print(f"ONNX output path: {onnx_output_path}")

# ==============================================================================
# CELL 1: DEFINE THE CUSTOM ONNX CONFIGURATION FOR IMAGE-TO-TEXT
# ==============================================================================
print("\nStep 1: Defining a custom ONNX configuration for the image-to-text model...")

class CustomGemma3NImageToTextOnnxConfig(TextDecoderOnnxConfig):
    """
    This ONNX configuration is for the Gemma3N model for image-to-text tasks.
    It handles the vision (pixel_values) and text (input_ids) modalities.
    """

    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
        num_layers="num_hidden_layers",
        num_attention_heads="num_attention_heads",
        hidden_size="hidden_size",
    )

    def __init__(self, config: PretrainedConfig, task: str = "default", **kwargs):
        super().__init__(config=config.text_config, task=task, **kwargs)
        self.text_config = config.text_config
        self.vision_config = config.vision_config
        self.config = config

    @property
    def inputs(self) -> Dict[str, Dict[int, str]]:
        text_inputs = super().inputs
        vision_inputs = {
            "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}
        }
        return {**vision_inputs, **text_inputs}

    def generate_dummy_inputs(self, batch_size: int = 1, sequence_length: int = 260, **kwargs) -> Dict[str, torch.Tensor]:
        """
        Generates dummy inputs for vision and text modalities.
        A batch_size of 1 is used to reduce memory consumption during export.
        """
        # 1. Generate Text Inputs
        text_input_generator = DummyTextInputGenerator(
            self.task,
            self._normalized_config,
            batch_size=batch_size,
            sequence_length=sequence_length,
            **self.text_config.to_dict(),
        )
        dummy_inputs = {
            "input_ids": text_input_generator.generate(input_name="input_ids", framework="pt"),
            "attention_mask": text_input_generator.generate(input_name="attention_mask", framework="pt"),
        }

        # 2. Inject Special Image Tokens
        image_token_id = self.config.image_token_id
        tokens_per_image = self.config.vision_soft_tokens_per_image
        if sequence_length < tokens_per_image:
            raise ValueError(f"Sequence length must be at least {tokens_per_image} to hold image tokens.")
        for i in range(batch_size):
            dummy_inputs["input_ids"][i, :tokens_per_image] = image_token_id

        # 3. Generate Past Key-Values if needed
        if self.use_past:
            past_key_values_generator = DummyPastKeyValuesGenerator(
                self.task, 
                self._normalized_config,
                batch_size=batch_size,
                sequence_length=sequence_length,
            )
            past_dummy_inputs = past_key_values_generator.generate(framework="pt")
            dummy_inputs.update(past_dummy_inputs)

        # 4. Generate Vision Inputs
        normalized_vision_config = NormalizedVisionConfig(self.vision_config)
        image_size = getattr(self.vision_config, "image_size", 224)
        num_channels = getattr(self.vision_config, "num_channels", 3)
        vision_input_generator = DummyVisionInputGenerator(
            self.task, 
            normalized_vision_config,
            batch_size=batch_size,
            num_channels=num_channels,
            height=image_size,
            width=image_size,
        )
        dummy_inputs["pixel_values"] = vision_input_generator.generate(input_name="pixel_values", framework="pt")

        return dummy_inputs

print("   - CustomGemma3NImageToTextOnnxConfig defined.")

# ==============================================================================
# CELL 2: PREPARE AND RUN THE ONNX EXPORT
# ==============================================================================
print("\nStep 2: Preparing and running the ONNX export...")

try:
    # --- Step 2.1: Clean the Configuration ---
    print("   - Cleaning the model configuration...")
    cleaned_config = AutoConfig.from_pretrained(pytorch_model_path, trust_remote_code=True)

    if hasattr(cleaned_config, "quantization_config"):
        delattr(cleaned_config, "quantization_config")
    if hasattr(cleaned_config, "unsloth_fixed"):
        delattr(cleaned_config, "unsloth_fixed")
    
    print("   - Configuration cleaned successfully.")

    # --- Step 2.2: Load the Model with the Cleaned Config ---
    print("   - Loading model with the cleaned configuration...")
    model = AutoModelForCausalLM.from_pretrained(
        pytorch_model_path,
        config=cleaned_config,
        torch_dtype=torch.float16, # Use float16 for memory optimization
        trust_remote_code=True,
    )
    model.eval() # Set model to evaluation mode before export
    print("   - Model loaded successfully!")

    # --- Step 2.3: Configure and Run the ONNX Export ---
    print("   - Configuring the ONNX export for the image-to-text model...")
    custom_onnx_config = CustomGemma3NImageToTextOnnxConfig(
        config=model.config, 
        task="text-generation"
    )

    # --- Step 2.4: Clean up memory before export ---
    print("   - Cleaning up memory before starting export...")
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    print("   - Starting ONNX export...")
    onnx_export(
        model=model,
        config=custom_onnx_config,
        output=Path(onnx_output_path),
        opset=14,
    )
    print("\n✅ ONNX conversion process completed successfully!")
    print(f"   The exported model is saved in: {Path(onnx_output_path).resolve()}")

except Exception as e:
    print(f"\n❌ An error occurred during the ONNX conversion process: {e}")
    import traceback
    traceback.print_exc()

In [None]:
import torch
from pathlib import Path
from transformers import AutoConfig, AutoModelForCausalLM, PretrainedConfig
from optimum.exporters.onnx import export as onnx_export
from optimum.exporters.onnx.config import TextDecoderOnnxConfig
from optimum.utils import (
    DummyVisionInputGenerator,
    DummyTextInputGenerator,
    DummyPastKeyValuesGenerator,
    NormalizedTextConfig,
    NormalizedVisionConfig,
)
from typing import Dict
import gc

# --- Configuration ---
# Path to the SLICED model from your second notebook. This is the only one that will fit in memory.
pytorch_model_path = "/kaggle/input/model-name-auramind-maize-expert-e2b/pytorch/default/1/AuraMind-E2B-Finetuned-Sliced"

# Path where the final ONNX model will be saved
onnx_output_path = "./onnx_multimodal_model"

print(f"Targeting SLICED model path: {pytorch_model_path}")
print(f"ONNX output path: {onnx_output_path}")

# ==============================================================================
# CELL 1: DEFINE THE CUSTOM ONNX CONFIGURATION FOR IMAGE-TO-TEXT
# ==============================================================================
print("\nStep 1: Defining a custom ONNX configuration for the image-to-text model...")

class CustomGemma3NImageToTextOnnxConfig(TextDecoderOnnxConfig):
    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
        num_layers="num_hidden_layers",
        num_attention_heads="num_attention_heads",
        hidden_size="hidden_size",
    )

    def __init__(self, config: PretrainedConfig, task: str = "default", **kwargs):
        super().__init__(config=config.text_config, task=task, **kwargs)
        self.text_config = config.text_config
        self.vision_config = config.vision_config
        self.config = config

    @property
    def inputs(self) -> Dict[str, Dict[int, str]]:
        text_inputs = super().inputs
        vision_inputs = {
            "pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}
        }
        return {**vision_inputs, **text_inputs}

    # Using a minimal sequence length to conserve memory during export
    def generate_dummy_inputs(self, batch_size: int = 1, sequence_length: int = 260, **kwargs) -> Dict[str, torch.Tensor]:
        text_input_generator = DummyTextInputGenerator(
            self.task, self._normalized_config, batch_size=batch_size, sequence_length=sequence_length, **self.text_config.to_dict(),
        )
        dummy_inputs = {
            "input_ids": text_input_generator.generate(input_name="input_ids", framework="pt"),
            "attention_mask": text_input_generator.generate(input_name="attention_mask", framework="pt"),
        }
        image_token_id = self.config.image_token_id
        tokens_per_image = self.config.vision_soft_tokens_per_image
        if sequence_length < tokens_per_image:
            raise ValueError(f"Sequence length must be at least {tokens_per_image} to hold image tokens.")
        for i in range(batch_size):
            dummy_inputs["input_ids"][i, :tokens_per_image] = image_token_id
        if self.use_past:
            past_key_values_generator = DummyPastKeyValuesGenerator(
                self.task, self._normalized_config, batch_size=batch_size, sequence_length=sequence_length,
            )
            dummy_inputs.update(past_key_values_generator.generate(framework="pt"))
        normalized_vision_config = NormalizedVisionConfig(self.vision_config)
        vision_input_generator = DummyVisionInputGenerator(
            self.task, normalized_vision_config, batch_size=batch_size, num_channels=3, height=224, width=224,
        )
        dummy_inputs["pixel_values"] = vision_input_generator.generate(input_name="pixel_values", framework="pt")
        return dummy_inputs

print("   - CustomGemma3NImageToTextOnnxConfig defined.")

# ==============================================================================
# CELL 2: PREPARE AND RUN THE ONNX EXPORT
# ==============================================================================
print("\nStep 2: Preparing and running the ONNX export...")

try:
    # --- Step 2.1: Load, Clean, and Fix the Sliced Model's Configuration ---
    print("   - Loading and fixing configuration for the SLICED model...")
    config = AutoConfig.from_pretrained(pytorch_model_path, trust_remote_code=True)
    
    # Clean the config by removing Unsloth and quantization artifacts
    if hasattr(config, "quantization_config"):
        delattr(config, "quantization_config")
        print("   - Removed 'quantization_config'.")
    if hasattr(config, "unsloth_fixed"):
        delattr(config, "unsloth_fixed")
        print("   - Removed 'unsloth_fixed'.")

    # Manually set the model_type to fix the recognition issue
    config.model_type = "gemma3n"
    print(f"   - Manually set model_type to: {config.model_type}")

    # --- Step 2.2: Load the Sliced Model with the Corrected Config ---
    print("   - Loading SLICED model with the corrected configuration...")
    # Using float16 to reduce memory footprint
    model = AutoModelForCausalLM.from_pretrained(
        pytorch_model_path,
        config=config, # Pass the fixed config object here
        torch_dtype=torch.float16, 
        trust_remote_code=True,
    )
    model.eval()
    print("   - Sliced model loaded successfully!")

    # --- Step 2.3: Configure and Run the ONNX Export ---
    print("   - Configuring the ONNX export...")
    custom_onnx_config = CustomGemma3NImageToTextOnnxConfig(
        config=model.config, 
        task="text-generation"
    )

    # --- Step 2.4: Clean up memory before export ---
    print("   - Cleaning up memory before starting export...")
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    print("   - Starting ONNX export...")
    onnx_export(
        model=model,
        config=custom_onnx_config,
        output=Path(onnx_output_path),
        opset=14,
    )
    print("\n✅ ONNX conversion process completed successfully!")
    print(f"   The exported model is saved in: {Path(onnx_output_path).resolve()}")

except Exception as e:
    print(f"\n❌ An error occurred during the ONNX conversion process: {e}")
    import traceback
    traceback.print_exc()


2025-08-05 01:33:19.909590: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754357600.214134      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754357600.304432      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'



Targeting SLICED model path: /kaggle/input/model-name-auramind-maize-expert-e2b/pytorch/default/1/AuraMind-E2B-Finetuned-Sliced
ONNX output path: ./onnx_multimodal_model

Step 1: Defining a custom ONNX configuration for the image-to-text model...
   - CustomGemma3NImageToTextOnnxConfig defined.

Step 2: Preparing and running the ONNX export...
   - Loading and fixing configuration for the SLICED model...
   - Removed 'quantization_config'.
   - Removed 'unsloth_fixed'.
   - Manually set model_type to: gemma3n
   - Loading SLICED model with the corrected configuration...
   - Sliced model loaded successfully!
   - Configuring the ONNX export...
   - Cleaning up memory before starting export...
   - Starting ONNX export...


  if feat_size[0] < high_resolution[0] or feat_size[1] < high_resolution[1]:
  if high_resolution[0] != self.output_resolution[0] or high_resolution[1] != self.output_resolution[1]:
  high_resolution[0] % self.output_resolution[0] != 0 or
  if not is_torchdynamo_compiling() and inputs_embeds[special_image_mask].numel() != image_features.numel():
  if per_layer_projection.shape != per_layer_inputs.shape:
  if (padding_length := kv_length + kv_offset - attention_mask.shape[-1]) > 0:
  if padding_mask is not None and padding_mask.shape[-1] > kv_length:
  epsilon_tensor = torch.tensor(1e-5)
  is_causal = query.shape[2] > 1 and attention_mask is None and getattr(module, "is_causal", True)
  target_sparsity_tensor = torch.tensor(self.activation_sparsity, dtype=torch.float32, device=inputs.device)
