In [1]:
import torch
import onnxruntime
from fastT5 import export_and_get_onnx_model
from transformers import T5Config, T5Tokenizer, T5ForConditionalGeneration

In [3]:
t5_config = T5Config(
    vocab_size=32128,
    d_model=768,
    d_kv=64,
    d_ff=2048,
    num_layers=12,
    num_decoder_layers=12,
    num_heads=12,
    relative_attention_num_buckets=32,
    dropout_rate=0.1,
    layer_norm_epsilon=1e-6,
    initializer_factor=1.0,
    feed_forward_proj="gated-gelu",
    is_encoder_decoder=True,
    use_cache=True,
    pad_token_id=0,
    eos_token_id=1,
    decoder_start_token_id=0,
    tie_word_embeddings=False,
    torch_dtype="float32",
    gradient_checkpointing=False)
model = T5ForConditionalGeneration(t5_config)
tokenizer = T5Tokenizer.from_pretrained("Langboat/mengzi-t5-base")
model.load_state_dict(torch.load("../serving/trained_model/20220327_kaggle/pytorch_model.bin", map_location=torch.device('cpu')))
model.save_pretrained("../serving/trained_model/GuwenNet")

In [4]:
from fastT5 import (OnnxT5, get_onnx_runtime_sessions,
                    generate_onnx_representation, quantize)


model_or_model_path = '../serving/trained_model/GuwenNet'

# Step 1. convert huggingfaces t5 model to onnx
onnx_model_paths = generate_onnx_representation(model_or_model_path)

Exporting to onnx... |################################| 3/3
[?25h

In [1]:
onnx_model_paths

NameError: name 'onnx_model_paths' is not defined

In [5]:
model_sessions = get_onnx_runtime_sessions(onnx_model_paths)
model = OnnxT5(model_or_model_path, model_sessions)

In [None]:
def run_onnx_model(model):
    t_input = "先帝开创的事业没有完成一半，却中途去世了。现在天下分裂成三个国家。蜀汉民力困乏，这实在是危急存亡的时候啊。"
    token = tokenizer(t_input, return_tensors='pt')

    tokens = model.generate(input_ids=token['input_ids'],
                   attention_mask=token['attention_mask'],
                           max_length=100)
    output = tokenizer.decode(tokens.squeeze(), skip_special_tokens=True)
    return output

In [10]:
output

'先帝创业未尽,而中道卒,今天下分裂为三国,蜀汉困乏,此诚危亡之时也。'

In [None]:
# Step 2. (recommended) quantize the converted model for fast inference and to reduce model size.
quant_model_paths = quantize(onnx_model_paths)

# step 3. setup onnx runtime
model_sessions = get_onnx_runtime_sessions(quant_model_paths)

# step 4. get the onnx model
model = OnnxT5(model_or_model_path, model_sessions)