In [34]:
import numpy as np
import tensorflow as tf
from tqdm import tqdm

import transformers
import onnxruntime as rt

In [84]:
model_path = "XLM-RoBERTA - CLPD/10_langs-XLM-RoBERTa-base-100/XLM-RoBERTa-base.onnx"

MAX_LEN = 512
fast_tokenizer = transformers.XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

In [85]:
def efficiency(labels, predictions, th=0.5):
    """Returns accuracy, recall, precision, f1 according to actual labels and predictions"""
    acc = tf.keras.metrics.BinaryAccuracy(threshold=th)
    acc.update_state(labels, predictions[:, ])
    acc_res = acc.result().numpy()
    rec = tf.keras.metrics.Recall(thresholds=th)
    rec.update_state(labels, predictions[:, ])
    rec_res = rec.result().numpy()
    prec = tf.keras.metrics.Precision(thresholds=th)
    prec.update_state(labels, predictions[:, ])
    prec_res = prec.result().numpy()

    f1_res = 2 * ((rec_res*prec_res)/(rec_res+prec_res))

    return acc_res, rec_res, prec_res, f1_res


def fast_encode(texts, tokenizer, chunk_size=256, maxlen=512):
    """Encodes texts using given tokenizer"""
    input_ids = []
    attention_masks = []

    for i in tqdm(range(0, len(texts), chunk_size)):
        chunk_ids = []
        chunk_attention_mask = []
        text_chunk = texts[i:i+chunk_size].tolist()
        for s in text_chunk:
            encoded_dict = tokenizer.encode_plus(
                s,
                add_special_tokens=True,
                max_length=maxlen,
                pad_to_max_length=True,
                return_attention_mask=True
            )
            chunk_ids.append(encoded_dict['input_ids'])
            chunk_attention_mask.append(encoded_dict['attention_mask'])
        input_ids.extend(chunk_ids)
        attention_masks.extend(chunk_attention_mask)
        
    return np.array(input_ids), np.array(attention_masks)


def encode_input(sent1, sent2):
    """Encode the texts from two arrays of sentences"""
    two_sent = np.char.add(np.char.add(sent1, " [SEP] "), sent2)
    x = fast_encode(two_sent, fast_tokenizer, maxlen=MAX_LEN)
    
    return x

In [86]:
sent1 = np.array(["На каждой «руке» расположено до десяти тысяч вкусовых рецепторов, определяющих съедобность или несъедобность предмета.", 
                  "На каждой «руке» расположено до десяти тысяч вкусовых рецепторов, определяющих съедобность или несъедобность предмета."
                ])
sent2 = np.array(["On each ""hand"" is up to ten thousand taste buds that determine edible or inedible object.", 
                  "The taste buds of the octopus is located on the hands, or rather on the suction cups."])
y = np.array([1, 0])

x = encode_input(sent1, sent2)

sess = rt.InferenceSession(model_path, providers=rt.get_available_providers())

input_name_0 = sess.get_inputs()[0].name
input_name_1 = sess.get_inputs()[1].name

input = {
    input_name_0: np.array(x[0]).astype(np.int64),
    input_name_1: np.array(x[1]).astype(np.int64)
    }

y_pred = sess.run(None, input)[0]

  0%|          | 0/1 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|██████████| 1/1 [00:00<00:00, 656.90it/s]


In [87]:
print(y_pred)
print(y)

efficiency(y, y_pred, 0.5)

[[0.679295  ]
 [0.06985122]]
[1 0]


(1.0, 1.0, 1.0, 1.0)