# Convert Finetuned FLAN-T5 Model from PyTorch format to ONNX

Export the finetuned checkpoint for production deployment by saving it in a common standard format: ONNX.

ONNX is compatible with multiple serving runtimes and it is kind of an Intermediate Representation that can be run indipendently from the toolkit/framework that the original model has been written in .

`ONNX` is the Acronym for `Open Neural Network Exchange`

In [None]:
# import libraries
try:
    import torch
    import os
    from dotenv import dotenv_values
    from optimum.onnxruntime import ORTModelForSeq2SeqLM
    from transformers import AutoTokenizer
except ImportError as e:
    print(f"Exception during library import {e}")

# load dotenv
config_env: dict = dotenv_values("localenv")

# load configuration parameters
CONFIG_FILE: str = config_env.get("PARAMETER_FILE", "parameters.yaml")
OUTPUT_DIR: str = config_env.get("OUTPUT_DIR", "flan-finetuned-ita")

## 1. Load & Convert Model via Optimum

In [None]:
# create output dir
ONNX_DIR: str = OUTPUT_DIR + "/onnx"
os.makedirs(ONNX_DIR, exist_ok=True)

In [None]:
# load model from local path via Optimum ONNX Optimizer
try:
    onnx_network_model = ORTModelForSeq2SeqLM.from_pretrained(OUTPUT_DIR, export=True)
    onnx_tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
except Exception as e:
    print(f"Exception during model export: {e}")

## 2. Save The Model!

In [None]:
# save onnx to disk
try:
    onnx_network_model.save_pretrained(ONNX_DIR)
    onnx_tokenizer.save_pretrained(ONNX_DIR)
except Exception as e:
    print(e)