In [1]:
import numpy as np
import pandas as pd

import spacy
from alibi.explainers import AnchorText
from alibi.datasets import fetch_movie_sentiment
from alibi.utils.download import spacy_model

import torch

from simpletransformers.model import TransformerModel

In [2]:
movies = fetch_movie_sentiment()
movies.keys()

dict_keys(['data', 'target', 'target_names'])

In [3]:
data = movies.data
labels = movies.target
target_names = movies.target_names

In [4]:
model = 'en_core_web_md'
spacy_model(model=model)
nlp = spacy.load(model)

In [5]:
model = TransformerModel('roberta', 'roberta-base', args=({'fp16': False}))

In [6]:
model.model.load_state_dict(torch.load('pytorch_model.bin'))

<All keys matched successfully>

In [7]:
predict_fn = lambda x: model.predict(x)[1].argmax(axis=1)

In [8]:

explainer = AnchorText(nlp, predict_fn)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  3.59it/s]


In [9]:
class_names = movies.target_names

In [10]:
text = "This film is so bad"
print(text)

This film is so bad


In [11]:
pred = class_names[predict_fn([text])[0]]
alternative =  class_names[1 - predict_fn([text])[0]]
print('Prediction: %s' % pred)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  4.03it/s]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  4.19it/s]

Prediction: negative





In [14]:
np.random.seed(0)
explanation = explainer.explain(text, threshold=0.95, use_unk=True, sample_proba=0.5)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  4.17it/s]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  4.12it/s]


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

  0%|          | 0/13 [00:00<?, ?it/s]




100%|██████████| 13/13 [00:21<00:00,  1.63s/it]


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

  0%|          | 0/13 [00:00<?, ?it/s]




100%|██████████| 13/13 [00:21<00:00,  1.65s/it]


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

  0%|          | 0/13 [00:00<?, ?it/s]




100%|██████████| 13/13 [00:20<00:00,  1.60s/it]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  4.44it/s]


In [20]:

#np.random.seed(0)
explanation = explainer.explain(text, threshold=0.95, use_unk=False, sample_proba=0.5)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:00<00:00,  1.75it/s]


ValueError: probabilities do not sum to 1

In [21]:

print('Anchor: %s' % explanation['names'])
print('Precision: %.2f' % explanation['precision'])
print('\nExamples where anchor applies and model predicts %s:' % pred)
print('\n'.join([x[0] for x in explanation['raw']['examples'][-1]['covered_true']]))
print('\nExamples where anchor applies and model predicts %s:' % alternative)
print('\n'.join([x[0] for x in explanation['raw']['examples'][-1]['covered_false']]))

Anchor: ['bad']
Precision: 1.00

Examples where anchor applies and model predicts negative:
UNK film is UNK bad
This UNK is UNK bad
This UNK is so bad
This film UNK UNK bad
This UNK UNK so bad
UNK film is UNK bad
UNK film is UNK bad
UNK film UNK UNK bad
This film is UNK bad
This film is UNK bad

Examples where anchor applies and model predicts positive:



In [16]:
import os
import sys
import csv
import numpy
import time
import timeit
from datetime import datetime
import argparse
import logging
import coloredlogs
import torch
import onnx
from enum import Enum
from packaging import version

logger = logging.getLogger(__name__)

def get_latency_result(runtimes, batch_size):
    latency_ms = sum(runtimes) / float(len(runtimes)) * 1000.0
    latency_variance = numpy.var(runtimes, dtype=numpy.float64) * 1000.0
    throughput = batch_size * (1000.0 / latency_ms)

    return {
        "test_times": len(runtimes),
        "latency_variance": "{:.2f}".format(latency_variance),
        "latency_90_percentile": "{:.2f}".format(numpy.percentile(runtimes, 90) * 1000.0),
        "latency_95_percentile": "{:.2f}".format(numpy.percentile(runtimes, 95) * 1000.0),
        "latency_99_percentile": "{:.2f}".format(numpy.percentile(runtimes, 99) * 1000.0),
        "average_latency_ms": "{:.2f}".format(latency_ms),
        "QPS": "{:.2f}".format(throughput),
    }

def inference_ort(ort_session, ort_inputs, result_template, repeat_times, batch_size):
    result = {}
    runtimes = timeit.repeat(lambda: ort_session.run(None, ort_inputs), number=1, repeat=repeat_times)
    result.update(result_template)
    result.update({"io_binding": False})
    result.update(get_latency_result(runtimes, batch_size))
    return result

In [18]:
from transformers import RobertaTokenizer
import torch 

import sys
import os

# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath("benchmark_helper")))
# sys.path.append(BASE_DIR)

#from benchmark_helper import inference_ort

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

import onnxruntime

ort_session = onnxruntime.InferenceSession("roberta_fp16.onnx")
# if ort_session is None:
#     continue

#             ort_output_names = [node_arg.name for node_arg in ort_session.get_outputs()]
output_buffers = {"last_state": None, "pooler": None}
#             device = "cpu"
#             config = AutoConfig.from_pretrained("roberta_fp16.onnx", cache_dir=cache_dir)
#             max_last_state_size = numpy.prod(
#                 [max(batch_sizes), max(sequence_lengths),
#                  max(vocab_size, config.hidden_size)])
#             max_pooler_size = numpy.prod([max(batch_sizes), config.hidden_size])
#             for batch_size in batch_sizes:
#                 if batch_size <= 0:
#                     continue
#                 for sequence_length in sequence_lengths:
#                     if max_sequence_length is not None and sequence_length > max_sequence_length:
#                         continue

results = []

device = "cpu"
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("This film is so bad", add_special_tokens=True)).unsqueeze(0)

ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_ids)}
batch_size = input_ids.shape[0]
sequence_length = input_ids.shape[1]

result_template = {
    "engine": "onnxruntime",
     "version": onnxruntime.__version__,
     "device": device,
#      "optimizer": optimize_onnx,
#       "precision": precision,
#       "io_binding": False,
    "model_name": "roberta_fp16.onnx",
#       "inputs": num_inputs,
     "batch_size": batch_size,
      "sequence_length": sequence_length,
       "datetime": str(datetime.now()),
}
logger.info("Run onnxruntime on {} with input shape {}".format("roberta_fp16.onnx",
                                                                                   [batch_size, sequence_length]))
result = inference_ort(ort_session, ort_inputs, result_template, 1, batch_size)
logger.info(result)
results.append(result)

#                     if not disable_ort_io_binding:
#                         logger.info("Run onnxruntime with io binding on {} with input shape {}".format(
#                             "roberta_fp16.onnx", [batch_size, sequence_length]))
#                         # Get output sizes from a dummy ort run
#                         ort_outputs = ort_session.run(ort_output_names, ort_inputs)
#                         result = inference_ort_with_io_binding(ort_session, ort_inputs, result_template, repeat_times,
#                                                                ort_output_names, ort_outputs, output_buffers)
#                         logger.info(result)
#                         results.append(result)

In [19]:
print(results)

[{'engine': 'onnxruntime', 'version': '1.3.0', 'device': 'cpu', 'model_name': 'roberta_fp16.onnx', 'batch_size': 1, 'sequence_length': 7, 'datetime': '2020-07-21 23:53:30.953123', 'io_binding': False, 'test_times': 1, 'latency_variance': '0.00', 'latency_90_percentile': '555.32', 'latency_95_percentile': '555.32', 'latency_99_percentile': '555.32', 'average_latency_ms': '555.32', 'QPS': '1.80'}]
