In [9]:
import torch
from onnxruntime_extensions import gen_processing_models
from onnxruntime_extensions import get_library_path
import onnx
import onnxruntime as ort
import numpy as np
from transformers import RobertaForSequenceClassification, RobertaTokenizer

# Step 1: Load the Huggingface Roberta tokenizer and model
input_text = "A test text!"
model_type = "roberta-base"

model = RobertaForSequenceClassification.from_pretrained(model_type, num_labels=3)
model.load_state_dict(
    torch.load("best_roberta_model_ultimate.pt", map_location=torch.device("cpu"))
)
model.eval()

tokenizer = RobertaTokenizer.from_pretrained(model_type)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# Step 2: Export the tokenizer to ONNX using gen_processing_models
onnx_tokenizer_path = "tokenizer.onnx"

# Generate the tokenizer ONNX model
tokenizer_onnx_model, _ = gen_processing_models(tokenizer, pre_kwargs={})
assert tokenizer_onnx_model is not None

# Save the tokenizer ONNX model
with open(onnx_tokenizer_path, "wb") as f:
    f.write(tokenizer_onnx_model.SerializeToString())

# Step 3: Export the Huggingface Roberta model to ONNX
onnx_model_path = "malware_classifier.onnx"
dummy_input = tokenizer(
    "This is a dummy input",
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=512,
)


# 5. Export the model to ONNX
torch.onnx.export(
    model,  # model to be exported
    (
        dummy_input["input_ids"],
        dummy_input["attention_mask"],
    ),  # model input (dummy input)
    onnx_model_path,  # where to save the ONNX model
    input_names=["input_ids", "attention_mask_input"],  # input tensor name
    output_names=["logits"],  # output tensor names
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "sequence_length"},  # dynamic axes
        "logits": {0: "batch_size"},
    },
)

# Step 4: Merge the tokenizer and model ONNX files into one
onnx_combined_model_path = "combined_malware_classifier.onnx"

# Load the tokenizer and model ONNX files
tokenizer_onnx_model = onnx.load(onnx_tokenizer_path)
model_onnx_model = onnx.load(onnx_model_path)

# Inspect the ONNX models to find the correct input/output names
print(
    "Tokenizer Model Inputs:", [node.name for node in tokenizer_onnx_model.graph.input]
)
print(
    "Tokenizer Model Outputs:",
    [node.name for node in tokenizer_onnx_model.graph.output],
)
print("Model Inputs:", [node.name for node in model_onnx_model.graph.input])
print("Model Outputs:", [node.name for node in model_onnx_model.graph.output])

# Merge the tokenizer and model ONNX files
combined_model = onnx.compose.merge_models(
    tokenizer_onnx_model,
    model_onnx_model,
    io_map=[("input_ids", "input_ids"), ("attention_mask", "attention_mask_input")],
)
# Save the combined model
onnx.save(combined_model, onnx_combined_model_path)

# Step 5: Test the combined ONNX model using an Inference session with ONNX Runtime Extensions
# Initialize ONNX Runtime SessionOptions and load custom ops library
sess_options = ort.SessionOptions()
sess_options.register_custom_ops_library(get_library_path())

# Initialize ONNX Runtime Inference session with Extensions
session = ort.InferenceSession(
    onnx_combined_model_path,
    sess_options=sess_options,
    providers=["CPUExecutionProvider"],
)

# Prepare dummy input text
input_feed = {
    "input_text": np.asarray([input_text])
}  # Assuming "input_text" is the input expected by the tokenizer

# Run the model
outputs = session.run(None, input_feed)

# Print the outputs
print("logits:", outputs[1][0])

Tokenizer Model Inputs: ['input_text']
Tokenizer Model Outputs: ['input_ids', 'attention_mask', 'offset_mapping']
Model Inputs: ['input_ids', 'attention_mask_input']
Model Outputs: ['logits']


[1;31m2025-03-27 14:26:47.067884944 [E:onnxruntime:, sequential_executor.cc:572 ExecuteKernel] Non-zero status code returned while running Expand node. Name:'/roberta/Expand_1' Status Message: invalid expand shape[m


InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Non-zero status code returned while running Expand node. Name:'/roberta/Expand_1' Status Message: invalid expand shape