In [2]:
import tensorflow as tf
# import tensorflow_hub as hub  # Not needed once model is downloaded/saved
from transformers import BertTokenizer

In [6]:
# In the example code a non-existing tokenizer is used. I choose the squad one which is also for Q&A
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
new_model = tf.keras.models.load_model('model/1')  # New directory (also not really needed)



In [7]:
new_model.compile()  # for local inference

## Example code from tf hub

In [8]:
questions = [
    'How long did it take to find the answer?',
    'What\'s the answer to the great question?',
    'What\'s the name of the computer?']
paragraph = '''<p>The computer is named Deep Thought.</p>.
               <p>After 46 million years of training it found the answer.</p>
               <p>However, nobody was amazed. The answer was 42.</p>'''

for question in questions:
  question_tokens = tokenizer.tokenize(question)
  paragraph_tokens = tokenizer.tokenize(paragraph)
  tokens = ['[CLS]'] + question_tokens + ['[SEP]'] + paragraph_tokens + ['[SEP]']
  input_word_ids = tokenizer.convert_tokens_to_ids(tokens)
  input_mask = [1] * len(input_word_ids)
  input_type_ids = [0] * (1 + len(question_tokens) + 1) + [1] * (len(paragraph_tokens) + 1)

  input_word_ids, input_mask, input_type_ids = map(lambda t: tf.expand_dims(
      tf.convert_to_tensor(t, dtype=tf.int32), 0), (input_word_ids, input_mask, input_type_ids))
  outputs = new_model([input_word_ids, input_mask, input_type_ids])
    
  print(outputs)
  print(outputs.shape)
  # using `[1:]` will enforce an answer. `outputs[0][0][0]` is the ignored '[CLS]' token logit
  short_start = tf.argmax(outputs[0][0][1:]) + 1
  short_end = tf.argmax(outputs[1][0][1:]) + 1
  answer_tokens = tokens[short_start: short_end + 1]
  answer = tokenizer.convert_tokens_to_string(answer_tokens)
  print(f'Question: {question}')
  print(f'Answer: {answer}')

[<tf.Tensor: shape=(1, 64), dtype=float32, numpy=
array([[ 7.5831766, -7.126385 , -8.315767 , -8.040369 , -7.9417977,
        -8.029746 , -7.820639 , -8.331517 , -8.144661 , -9.128356 ,
        -8.238922 , -6.824143 , -6.851987 , -8.14085  , -7.275723 ,
        -5.711597 , -6.488315 , -8.637387 , -8.109914 , -4.772206 ,
        -8.957758 , -8.470302 , -6.1216087, -8.032608 , -7.8976817,
        -8.5412445, -6.8471313, -4.765982 , -7.4818387, -6.6101646,
         1.5490808,  4.649346 , -4.1442738, -3.5947165, -7.098928 ,
        -3.7559516, -6.139193 , -6.8271527, -6.8225207, -8.005595 ,
        -8.341232 , -7.9348207, -8.764536 , -8.705311 , -9.039872 ,
        -7.3882203, -8.58604  , -8.926344 , -6.628455 , -8.407904 ,
        -6.4927077, -8.317963 , -8.328851 , -7.268306 , -4.6447043,
        -6.8083863, -6.0284085, -1.6590921, -8.3523   , -8.886298 ,
        -8.954616 , -8.860009 , -9.228969 , -7.570626 ]], dtype=float32)>, <tf.Tensor: shape=(1, 64), dtype=float32, numpy=
array([[ 7

AttributeError: 'list' object has no attribute 'shape'

In [20]:
questions = ['How long did it take to find the answer?']
paragraph = '''<p>The computer is named Deep Thought.</p>.
               <p>After 46 million years of training it found the answer.</p>
               <p>However, nobody was amazed. The answer was 42.</p>'''

question_tokens = tokenizer.tokenize(question)
paragraph_tokens = tokenizer.tokenize(paragraph)
tokens = ['[CLS]'] + question_tokens + ['[SEP]'] + paragraph_tokens + ['[SEP]']
input_word_ids = tokenizer.convert_tokens_to_ids(tokens)
input_mask = [1] * len(input_word_ids)
input_type_ids = [0] * (1 + len(question_tokens) + 1) + [1] * (len(paragraph_tokens) + 1)

input_word_ids, input_mask, input_type_ids = map(lambda t: tf.expand_dims(
  tf.convert_to_tensor(t, dtype=tf.int32), 0), (input_word_ids, input_mask, input_type_ids))

In [31]:
import requests
import json

In [56]:
payload = {
    "inputs": {    
        "input_word_ids": input_word_ids.numpy().tolist(),
        "input_mask": input_mask.numpy().tolist(),
        "input_type_ids": input_type_ids.numpy().tolist()
    }
}

In [51]:
tf_serving_url = "http://localhost:8501/v1/models/bert_qa:predict"
r = requests.post(tf_serving_url, data=json.dumps(payload))

In [54]:
answer = r.json()["outputs"]["tf_bert_for_natural_question_answering_1"]

In [55]:
short_start = tf.argmax(outputs[0][0][1:]) + 1
short_end = tf.argmax(outputs[1][0][1:]) + 1
answer_tokens = tokens[short_start: short_end + 1]
answer = tokenizer.convert_tokens_to_string(answer_tokens)
print(f'Answer: {answer}')

Answer: deep thought
