In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import os
import time
import random
import tensorflow as tf

%env TF_FORCE_GPU_ALLOW_GROWTH=true

env: TF_FORCE_GPU_ALLOW_GROWTH=true


## Using pre-trained ELMo Model

### Downloading the ELMo Model from TFHub

In [2]:
import tensorflow_hub as hub
import tensorflow.keras.backend as K

K.clear_session()
elmo_layer = hub.KerasLayer("https://tfhub.dev/google/elmo/3", signature="tokens",signature_outputs_as_dict=True)

### Formatting the input for ELMo

In [3]:
def format_text_for_elmo(texts, lower=True, split=" "):
    
    token_inputs = []
    token_lengths = []
    
    max_len = 0
    for text in texts:        
        tokens = tf.keras.preprocessing.text.text_to_word_sequence(text, lower=lower, split=split)
        
        token_inputs.append(tokens)
        token_lengths.append(len(tokens))           
        
        if len(tokens)>max_len:
            max_len = len(tokens)
    
    token_inputs = [inp+[""]*(max_len-len(inp)) for inp in token_inputs]
    
    return {
        "tokens": tf.constant(token_inputs), 
        "sequence_len": tf.constant(token_lengths)
    }


print(format_text_for_elmo(["the cat sat on the mat", "the mat sat"]))

{'tokens': <tf.Tensor: shape=(2, 6), dtype=string, numpy=
array([[b'the', b'cat', b'sat', b'on', b'the', b'mat'],
       [b'the', b'mat', b'sat', b'', b'', b'']], dtype=object)>, 'sequence_len': <tf.Tensor: shape=(2,), dtype=int32, numpy=array([6, 3], dtype=int32)>}


In [5]:
# 001.txt - 005.txt in bbc/business
elmo_inputs = format_text_for_elmo([
    "Ad sales boost Time Warner profit",
    "Dollar gains on Greenspan speech",
    "Yukos unit buyer faces loan claim",
    "High fuel prices hit BA's profits",
    "Pernod takeover talk lifts Domecq"
])


elmo_result = elmo_layer(elmo_inputs)

for k,v in elmo_result.items():
    
    print("Tensor under key={} is a {} shaped Tensor".format(k, v.shape))

Tensor under key=sequence_len is a (5,) shaped Tensor
Tensor under key=word_emb is a (5, 6, 512) shaped Tensor
Tensor under key=lstm_outputs2 is a (5, 6, 1024) shaped Tensor
Tensor under key=elmo is a (5, 6, 1024) shaped Tensor
Tensor under key=default is a (5, 1024) shaped Tensor
Tensor under key=lstm_outputs1 is a (5, 6, 1024) shaped Tensor


## Using pre-trained FastText