# Saving a model to the ONNX format

This demo illustrates how to save a model that we trained in chapter 1 to the ONNX format. 


# Training a tokenizer

The first step in training the model is to train the tokenizer. 

In [1]:
# just checking if CUDA is available on this computer
import torch

torch.cuda.is_available()

True

## Loading the model from file

Here, we load the model from the hard drive. 

In [4]:
from transformers import pipeline
from pprint import pprint

feature_extraction = pipeline(
    "fill-mask",
    model="./decBERTa",
    tokenizer="./decBERTa"
)

strPredicted = feature_extraction("int i = <mask>;")

Device set to use cuda:0


In [6]:
strPredicted[0]

{'score': 0.9335035681724548,
 'token': 266,
 'token_str': ' 0',
 'sequence': 'int i = 0;'}

In [None]:
from pathlib import Path
import transformers
from transformers.onnx import FeaturesManager
from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM

# Load model and tokenizer
model_id = "./decBERTa"
feature = "masked-lm"  
model = AutoModelForMaskedLM.from_pretrained(model_id)  
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load config
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=feature)
onnx_config = model_onnx_config(model.config)

# Export
onnx_inputs, onnx_outputs = transformers.onnx.export(
    preprocessor=tokenizer,
    model=model,
    config=onnx_config,
    opset=13,
    output=Path("decBERTa.onnx")
)

print("ONNX model exported successfully to decBERTa.onnx")