In [1]:
from models import RobertaSpanNer
from transformers import RobertaConfig
import torch
import numpy as np

model_checkpoint = 'roberta-base'
batch_size = 1
short_label_list = ['O', 'substitute', 'before-insertions', 'after-insertions', 'revocation']
num_labels = len(short_label_list)
config = RobertaConfig.from_pretrained(model_checkpoint, num_labels=num_labels)
config.loss_type = 'ce'

import torch.nn as nn
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = RobertaSpanNer.from_pretrained('./roberta-base-finetuned-span/checkpoint-4380', config=config)
    def forward(self, input_ids, attention_mask):
        start_logit, end_logit = self.model(input_ids, attention_mask)
        return start_logit, end_logit
    
cu_model = CustomModel()

In [3]:
# Some standard imports
import io
import numpy as np

from torch import nn
import torch.utils.model_zoo as model_zoo
import torch.onnx

# Super Resolution model definition in PyTorch
import torch.nn as nn
import torch.nn.init as init

cu_model.eval()

# Input to the model
x1 = torch.randint(1, 100, (batch_size, 512), dtype=torch.int32)
x2 = torch.randint(0, 2, (batch_size, 512), dtype=torch.int32)
print(x2)
#x = torch.tensor([[42]*8])
torch_out = cu_model(x1, x2)
# Export the model

torch.onnx.export(cu_model,               # model being run
                  (x1, x2),                         # model input (or a tuple for multiple inputs)
                  "span_ner.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=12,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input_ids', 'attention_mask'],   # the model's input names
                  output_names = ['start_logit', 'end_logit'], # the model's output names
                  dynamic_axes={'input_ids' : {0 : 'batch_size', 1:'sequence'},    # variable length axes
                                'attention_mask' : {0 : 'batch_size', 1:'sequence'},
                                'start_logit' : {0 : 'batch_size',1:'sequence'},
                                'end_logit' : {0 : 'batch_size',1:'sequence'}}
                  )


tensor([[0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
         0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,
         1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
         1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1,
         0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0,
         0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
         1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
         0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
         0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
         1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,
         1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
         1, 1, 0, 1, 1, 1, 1

In [9]:
x1 = torch.randint(1, 100, (batch_size, 512))
x1.dtype

torch.int64

In [3]:
import onnx
import onnxruntime

onnx_model = onnx.load("span_ner.onnx")
onnx.checker.check_model(onnx_model)

ort_session = onnxruntime.InferenceSession("span_ner.onnx")

# Input to the model
batch_size = 1
x = torch.randint(1, 100, (batch_size, 512))
torch_out = cu_model(x1, x2)[0]
print(torch_out.size())

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x1),
              ort_session.get_inputs()[1].name: to_numpy(x2)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

torch.Size([1, 512, 5])
Exported model has been tested with ONNXRuntime, and the result looks good!


In [None]:
import os

import torch
import torch.nn as nn
from transformers import AutoModelForSeq2SeqLM
from models.seq2seq import *

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# model = AutoModelForSeq2SeqLM.from_pretrained('./weights/annotation_gen_BART')
# tokenizer = AutoTokenizer.from_pretrained('./weights/annotation_gen_BART')
#
# ARTICLE_TO_SUMMARIZE = "In Article 8 (civil-military coordination), in each of paragraphs 1 to 3 for “Member States” substitute “The Secretary of State”."
# inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
#
# # Generate Summary
# summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=30, early_stopping=True)
# [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.model = AutoModelForSeq2SeqLM.from_pretrained('./weights/annotation_gen_BART/')

    def forward(self, input_ids):
        return self.model.generate(input_ids, num_beams=4, max_length=30, early_stopping=True)


def export_seq2seq_onnx_representation(model_path='./weights/annotation_gen_BART', save_path='./weights/bart_onnx'):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
    model.eval()
    encoder = model.model.encoder
    decoder = model.model.decoder
    lm_head = model.lm_head

    decoder_with_lm_head = CombinedDecoder(decoder, lm_head, model.config)
    simplified_encoder = SimplifiedT5Encoder(encoder)
    # Example sequence
    input_ids = torch.tensor([[42] * 10])

    # Exports to ONNX
    _ = torch.onnx.export(
        decoder_with_lm_head.eval(),
        (input_ids, simplified_encoder(input_ids)),
        f"{save_path}/decoder.onnx",
        export_params=True,
        opset_version=12,
        input_names=['input_ids', 'encoder_hidden_states'],
        output_names=['hidden_states'],
        dynamic_axes={
            'input_ids': {0: 'batch', 1: 'sequence'},
            'encoder_hidden_states': {0: 'batch', 1: 'sequence'},
            'hidden_states': {0: 'batch', 1: 'sequence'},
        })

    _ = torch.onnx._export(
        simplified_encoder.eval(),
        input_ids,
        f"{save_path}/encoder.onnx",
        export_params=True,
        opset_version=12,
        input_names=['input_ids'],
        output_names=['hidden_states'],
        dynamic_axes={
            'input_ids': {0: 'batch', 1: 'sequence'},
            'hidden_states': {0: 'batch', 1: 'sequence'},
        }
    )


# import onnxruntime
# ort_session = onnxruntime.InferenceSession("./weights/bart_onnx/bart.onnx")
#
# def to_numpy(tensor):
#     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
#
# # compute ONNX Runtime output prediction
# x = torch.randint(high=100, size=(1, 256))
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
# ort_outs = ort_session.run(None, ort_inputs)
if __name__ == '__main__':
    export_seq2seq_onnx_representation()
    print("Model exported at ", './weights/bart_onnx')

# TorchScript

In [3]:
# Input to the model
x = torch.randint(1, 100, (batch_size, 512))
traced_cell = torch.jit.trace(cu_model, x)

In [4]:
print(traced_cell.code)

def forward(self,
    input_ids: Tensor) -> Tuple[Tensor, Tensor]:
  _0, _1, = (self.model).forward(input_ids, )
  return (_0, _1)

