In [1]:
import os
from pathlib import Path

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [2]:
current_dir = Path().resolve()
while not current_dir.name.endswith("xlm-roberta-base-cls-depression"):
    current_dir = current_dir.parent

os.chdir(current_dir)

input_val_data = current_dir / "data/clean/val.csv"
input_model_dir = current_dir / "data/models/xlm-roberta-base-cls-depression"
output_model_dir = current_dir / "data/dist/xlm-roberta-base-cls-depression"
output_model_base_filename = output_model_dir / "model.onnx"
output_model_quantized_filename = output_model_dir / "model.quant.onnx"

os.makedirs(output_model_dir, exist_ok=True)

### ONNX

In [3]:
model = AutoModelForSequenceClassification.from_pretrained(input_model_dir)
model.eval()

tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

text = "Sample text"
encoding = tokenizer(text, return_tensors="pt", padding="max_length", max_length=512, truncation=True)

symbolic_names = { 0: 'batch_size', 1: 'max_seq_len'}
torch.onnx.export(
    model,
    (encoding["input_ids"], encoding["attention_mask"]),
    output_model_base_filename,
    input_names=['input_ids', 'attention_mask'],
    output_names=['logits'],
    dynamic_axes={
        'input_ids': symbolic_names,
        'attention_mask': symbolic_names,
        'logits': symbolic_names
    },
    opset_version=16,
    do_constant_folding=True
)

### ONNX.QUANT

In [3]:
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from optimum.onnxruntime import ORTQuantizer, ORTModelForSequenceClassification

onnx_model = ORTModelForSequenceClassification.from_pretrained(input_model_dir, export=True)

In [4]:
quantizer = ORTQuantizer.from_pretrained(onnx_model)

In [5]:
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)

In [6]:
model_quantized_path = quantizer.quantize(
    save_dir=output_model_quantized_filename,
    quantization_config=dqconfig,
)