In [2]:
from allennlp.modules.elmo import Elmo, batch_to_ids

from allennlp.commands.elmo import ElmoEmbedder

In [None]:
options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

In [1]:
# Code from this tutorial: https://github.com/allenai/allennlp/blob/master/tutorials/how_to/elmo.md

# Compute two different representation for each token.
# Each representation is a linear weighted combination for the
# 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM))
elmo = Elmo(options_file, weight_file, 2, dropout=0)

# use batch_to_ids to convert sentences to character ids
sentences = [['Test', 'sheep', '.'], ['Another', '.']]
character_ids = batch_to_ids(sentences)

embeddings = elmo(character_ids)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


100%|██████████| 336/336 [00:00<00:00, 84866.08B/s]
100%|██████████| 374434792/374434792 [08:56<00:00, 698392.55B/s] 


{'elmo_representations': [tensor([[[-0.0471, -0.3260, -0.6423,  ..., -0.0660,  0.3173,  0.3172],
           [ 0.6056, -0.1005,  0.1127,  ...,  0.5817,  0.3201,  0.7672],
           [-0.4786, -0.4143, -0.6049,  ..., -0.0803,  0.0361,  0.1128]],
  
          [[ 0.2603, -0.4437,  0.2726,  ..., -0.0830, -0.1522, -0.1361],
           [-0.7772, -0.4294, -0.2651,  ..., -0.0803,  0.0361,  0.1128],
           [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]],
         grad_fn=<CopySlices>),
  tensor([[[-0.0471, -0.3260, -0.6423,  ..., -0.0660,  0.3173,  0.3172],
           [ 0.6056, -0.1005,  0.1127,  ...,  0.5817,  0.3201,  0.7672],
           [-0.4786, -0.4143, -0.6049,  ..., -0.0803,  0.0361,  0.1128]],
  
          [[ 0.2603, -0.4437,  0.2726,  ..., -0.0830, -0.1522, -0.1361],
           [-0.7772, -0.4294, -0.2651,  ..., -0.0803,  0.0361,  0.1128],
           [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]],
         grad_fn=<CopySlices>)],
 'mask': tensor([[1, 1,

In [32]:
# embeddings['elmo_representations'] is length two list of tensors.
# Each element contains one layer of ELMo representations with shape
# (2, 3, 1024).
#   2    - the batch size
#   3    - the sequence length of the batch
#   1024 - the length of each ELMo vector

embeddings["elmo_representations"]

[tensor([[[-0.0471, -0.3260, -0.6423,  ..., -0.0660,  0.3173,  0.3172],
          [ 0.6056, -0.1005,  0.1127,  ...,  0.5817,  0.3201,  0.7672],
          [-0.4786, -0.4143, -0.6049,  ..., -0.0803,  0.0361,  0.1128]],
 
         [[ 0.2603, -0.4437,  0.2726,  ..., -0.0830, -0.1522, -0.1361],
          [-0.7772, -0.4294, -0.2651,  ..., -0.0803,  0.0361,  0.1128],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]],
        grad_fn=<CopySlices>),
 tensor([[[-0.0471, -0.3260, -0.6423,  ..., -0.0660,  0.3173,  0.3172],
          [ 0.6056, -0.1005,  0.1127,  ...,  0.5817,  0.3201,  0.7672],
          [-0.4786, -0.4143, -0.6049,  ..., -0.0803,  0.0361,  0.1128]],
 
         [[ 0.2603, -0.4437,  0.2726,  ..., -0.0830, -0.1522, -0.1361],
          [-0.7772, -0.4294, -0.2651,  ..., -0.0803,  0.0361,  0.1128],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]],
        grad_fn=<CopySlices>)]

In [None]:
# other example

ee = ElmoEmbedder()

In [30]:
# The ElmoEmbedder class returns three vectors for each word, each vector corresponding to a layer in the 
# ELMo LSTM output. The first layer corresponds to the context insensitive token representation, followed 
# by the two LSTM layers. See the ELMo paper or follow up work at EMNLP 2018 for a description of what types 
# of information is captured in each layer.

for i in range(3):
    print(ee.embed_sentence(['i', 'am', 'flying', 'to', 'new', 'york'])[0][i])

[-0.3816778   0.2601955  -0.27387285 ...  0.21414772 -0.31411317
 -0.46676502]
[ 0.47849342 -0.40329033 -0.27019072 ...  0.47313985  0.06390661
 -0.24718228]
[ 0.49110138 -0.40165097  0.26552275 ... -0.39823037  0.06944599
  0.5665358 ]


In [31]:
ee.embed_sentence(["Test", "sheep", "."])

array([[[-0.8134119 , -0.63056827,  0.5182505 , ..., -0.70603675,
          0.56682426,  0.01298006],
        [-0.5621832 , -0.27352428, -0.07766137, ...,  1.0250176 ,
         -0.20382307, -0.15430188],
        [-0.88715035, -0.20039932, -1.060133  , ..., -0.2655458 ,
          0.21145992,  0.19772941]],

       [[ 0.25827685, -0.19399653, -1.1400886 , ...,  0.09824269,
         -0.18094376,  0.0101051 ],
        [ 1.0442259 , -0.48662874, -0.19318259, ...,  0.39772868,
          0.4329107 ,  0.5663998 ],
        [-0.36431175, -0.6467032 ,  0.11072873, ..., -0.02805123,
         -0.01774108,  0.05961663]],

       [[ 0.19796309, -0.46005875, -1.8209531 , ...,  0.4071253 ,
          0.54252553,  0.922896  ],
        [ 1.4947687 ,  0.2033867 ,  0.20443833, ...,  0.35850075,
          0.73835933,  1.8798531 ],
        [-0.26518375, -0.66888607, -1.253718  , ..., -0.00874215,
         -0.08153988,  0.07605419]]], dtype=float32)