# Inference from saved model

In [1]:
tag_index  ={'I-gpe': 0,
            'B-art': 1,
            'B-org': 2,
            'I-org': 3,
            'I-per': 4,
            'I-tim': 5,
            'I-nat': 6,
            'I-geo': 7,
            'B-nat': 8,
            'B-per': 9,
            'B-geo': 10,
            'I-eve': 11,
            'B-tim': 12,
            'I-art': 13,
            'B-gpe': 14,
            'B-eve': 15,
            'O': 16}

In [2]:
def convert_sentence_for_inference(sent):

    tokenized_words = sent.split(' ')
    max_len = 50
    words_sentence_padded = []
    for i in range(max_len):
        try:
            # append word on the sentence
            words_sentence_padded.append(tokenized_words[i])
        except:
            # If sequence is less than 50 tokens, add padding manually
            words_sentence_padded.append('__PAD__')
   
    return words_sentence_padded   

In [3]:
import tensorflow_hub as hub
import tensorflow as tf

import tensorflow.compat.v1 as tf1
tf1.disable_eager_execution()
tf1.disable_v2_behavior()

import warnings
warnings.filterwarnings('ignore')

Instructions for updating:
non-resource variables are not supported in the long term


In [4]:
print(f'Using Device: {tf.config.list_physical_devices("GPU")}')

Using Device: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [12]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
batch_size = 1
max_len = 50

from cachetools import TTLCache,cached
cache = TTLCache(maxsize=100, ttl=21600)

# @cached(cache)
# def ElmoEmbedding(x):
#     return elmo(tf.reshape(tf.cast(x,tf.string),[-1]),signature='default',as_dict=True)['elmo']
@cached(cache)
def ElmoEmbedding(x):
    return elmo(inputs={
                            "tokens": tf.squeeze(tf.cast(x, tf.string)),
                            "sequence_len": tf.constant(batch_size*[max_len])
                      },
                      signature="tokens",
                      as_dict=True)["elmo"]

In [13]:
## Model Architecture

# input text is a sequence of 50 tokens
def get_model(max_len=50):
    input_text = tf.keras.layers.Input(shape=(max_len,), dtype='string',name='input_text')
    embedding = tf.keras.layers.Lambda(ElmoEmbedding,output_shape = (None,max_len,1024), name = 'elmo_embedding')(input_text)

    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512,return_sequences=True,recurrent_dropout=0.2,dropout=0.2))(embedding)
    x_rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512,return_sequences=True,recurrent_dropout=0.2,dropout=0.2))(x)

    #   residual connection
    x = tf.keras.layers.Add()([x,x_rnn])

    out = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(17,activation='softmax'))(x)

    model =tf.keras.models.Model(input_text,out)
    model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

    return model

In [14]:
import numpy as np
with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.global_variables_initializer())
    sess.run(tf.compat.v1.tables_initializer())
    model = get_model()
    model.load_weights('../Models/NER_ELMo_biLSTM.h5')
    text = convert_sentence_for_inference('Joe Biden, the new president elect of the United States will take the office on 9th November')
    X_tr_arr = np.array(text)
    X_tr_t = X_tr_arr.reshape(1,50)
    test_pred = model.predict(X_tr_t, verbose=1,batch_size=32)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: slice index 1 of dimension 0 out of bounds.
	 [[{{node elmo_embedding_3/module_2_apply_tokens/strided_slice_1}}]]
	 [[time_distributed_3/Reshape_1/_491]]
  (1) Invalid argument: slice index 1 of dimension 0 out of bounds.
	 [[{{node elmo_embedding_3/module_2_apply_tokens/strided_slice_1}}]]
0 successful operations.
0 derived errors ignored.

In [None]:
 text = convert_sentence_for_inference('Joe Biden, the new president elect of the United States will take the office on 9th November')

In [51]:
text = np.array(text)

In [52]:
text.reshape(1,50)

array([['Joe', 'Biden,', 'the', 'new', 'president', 'elect', 'of', 'the',
        'United', 'States', 'will', 'take', 'the', 'office', 'on', '9th',
        'November', '__PAD__', '__PAD__', '__PAD__', '__PAD__',
        '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__',
        '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__',
        '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__',
        '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__',
        '__PAD__', '__PAD__', '__PAD__', '__PAD__', '__PAD__']],
      dtype='<U9')