In [2]:
import torch
import onnxruntime
from transformers import BertTokenizerFast
import numpy as np
ses = onnxruntime.InferenceSession('onnx/bert-base-uncased/bert-base-uncased-emotion.onnx', providers=["CUDAExecutionProvider"])
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
io_binding = ses.io_binding()

In [49]:
texts = ['This is fun.', 'I am very angry.', 'I am sad.']
inputs = tokenizer(texts, padding=True)

for input_name in inputs.keys():
    io_binding.bind_cpu_input( # input을 numpy로 변환 후 CUDA에 바인딩
        input_name, np.array(inputs[input_name]))

io_binding.bind_output('output') # output을 CUDA에 바인딩
ses.run_with_iobinding(io_binding) # session 실행
outputs = io_binding.copy_outputs_to_cpu() # CUDA에서 output을 가져옴

labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
output_labels = [labels[int(output.argmax())] for output in torch.Tensor(outputs).squeeze()]
output_scores = [float(output.softmax(dim=-1).max()) for output in torch.Tensor(outputs).squeeze()]

print('\n'.join(f'{text} => {label}: {score*100:.2f}%' for text, label, score in zip(texts, output_labels, output_scores)))

This is fun. => joy: 99.88%
I am very angry. => anger: 99.63%
I am sad. => sadness: 99.80%
