/
generate_bert.py
33 lines (26 loc) · 1.45 KB
/
generate_bert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# %% ---------------------------------------------
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
import torch.onnx
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased")
model = AutoModelForMaskedLM.from_pretrained("bert-large-uncased", torchscript=True)
model.eval()
dummy_tensor = torch.randint(0, 30522, (1, 512))
# %% ---------------------------------------------
with torch.no_grad():
traced_model = torch.jit.trace(model, dummy_tensor)
torch.jit.save(traced_model, "model/large_lm.pt")
# %% ---------------------------------------------
batch_size = 1
torch_out = model(dummy_tensor)
torch.onnx.export(model, # model being run
dummy_tensor, # model input (or a tuple for multiple inputs)
"bert_onnx.pt", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names=['input'], # the model's input names
output_names=['output'], # the model's output names
dynamic_axes={'input': {0: 'batch_size'}, # variable length axes
'output': {0: 'batch_size'}})
# %%