In [1]:
import os

In [2]:
os.environ['CUDA_VISIBLE_DEVICE'] = "1"


from allennlp.commands.elmo import ElmoEmbedder
from github_search import lm_utils

In [3]:
%%time
elmo = ElmoEmbedder()

CPU times: user 11.5 s, sys: 247 ms, total: 11.7 s
Wall time: 19.8 s


In [4]:
tokens = ["I", "ate", "an", "apple", "for", "breakfast"]
vectors = elmo.embed_sentence(tokens)

assert(len(vectors) == 3) # one for each layer in the ELMo output
assert(len(vectors[0]) == len(tokens)) # the vector elements correspond with the input tokens

import scipy
vectors2 = elmo.embed_sentence(["I", "ate", "a", "carrot", "for", "breakfast"])
scipy.spatial.distance.cosine(vectors[2][3], vectors2[2][3]) # cosine distance between "apple" and "carrot" in the last layer

0.18020617961883545

In [5]:
from allennlp.modules.elmo import Elmo, batch_to_ids

options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

# Compute two different representation for each token.
# Each representation is a linear weighted combination for the
# 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM))

In [6]:
%%time
elmo = Elmo(options_file, weight_file, 2, dropout=0)

CPU times: user 11.5 s, sys: 291 ms, total: 11.8 s
Wall time: 19.9 s


In [7]:
# use batch_to_ids to convert sentences to character ids
sentences = [['First', 'sentence', '.'], ['Another', '.'], ['Yet', 'another', 'sentence', '.']]
character_ids = batch_to_ids(sentences)

In [8]:
embeddings = elmo(character_ids)

In [9]:
elmo_lstm = elmo._elmo_lstm._elmo_lstm

In [24]:
elmo_lstm._states

(tensor([[[-0.4064, -1.2748, -1.7271,  ...,  1.0071,  1.3740,  0.8320],
          [-0.2691, -1.1981, -1.4729,  ...,  0.9367,  1.4424,  0.8069],
          [-0.3681, -1.2980, -1.8114,  ...,  0.9643,  1.2113,  1.0951]],
 
         [[-0.6784, -0.6880, -1.6490,  ..., -0.2163, -1.6589, -0.3530],
          [-0.8312, -1.0318, -1.7815,  ...,  0.2974, -1.2278, -0.3933],
          [-0.4312, -0.8275, -1.4842,  ..., -0.1300, -1.5446, -0.6574]]]),
 tensor([[[ 2.0361e-10,  3.6427e-06,  4.3772e-11,  ..., -1.3933e-03,
           -4.7057e-06, -9.9355e-01],
          [ 7.2584e-11,  2.0872e-05,  3.8905e-11,  ..., -3.1245e-03,
           -1.6488e-06, -9.9408e-01],
          [ 4.9811e-11,  2.0812e-06,  4.4098e-11,  ..., -1.3348e-03,
           -6.3097e-06, -9.9413e-01]],
 
         [[-4.5433e-01,  2.3301e-04,  2.6471e-01,  ...,  8.6803e-02,
            6.6469e-03, -2.1674e-01],
          [-5.3327e-01, -9.4982e-04,  4.9209e-01,  ...,  3.0989e-01,
            1.8197e-01, -2.6571e-01],
          [-3.9661e-01, 

In [11]:
elmo_wrapper = lm_utils.AllenELMoWrapper(elmo)

In [12]:
??lm_utils.AllenELMoWrapper.__init__

[0;31mSignature:[0m
[0mlm_utils[0m[0;34m.[0m[0mAllenELMoWrapper[0m[0;34m.[0m[0m__init__[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0melmo[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtokenizer[0m[0;34m=[0m[0;34m<[0m[0mfunction[0m [0mAllenELMoWrapper[0m[0;34m.[0m[0;34m<[0m[0;32mlambda[0m[0;34m>[0m [0mat[0m [0;36m0x7f05286b82f0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Initialize self.  See help(type(self)) for accurate signature.
[0;31mSource:[0m   
[0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0melmo[0m[0;34m,[0m [0mtokenizer[0m[0;34m=[0m[0mattr_dict[0m[0;34m[[0m[0;34m'tokenizer'[0m[0;34m][0m[0;34m.[0m[0mdefault[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m.[0m[0melmo[0m [0;34m=[0m [0melmo[0m[0;34m[0m
[0;34m[0m    [0mself[

In [13]:
texts = [' '.join(toks) for toks in sentences]

In [20]:
elmo_wrapper.get_last_hiddens_batch(texts)

(tensor([[[-0.4064, -1.2748, -1.7271,  ...,  1.0071,  1.3740,  0.8320],
          [-0.2691, -1.1981, -1.4729,  ...,  0.9367,  1.4424,  0.8069],
          [-0.3681, -1.2980, -1.8114,  ...,  0.9643,  1.2113,  1.0951]],
 
         [[-0.6784, -0.6880, -1.6490,  ..., -0.2163, -1.6589, -0.3530],
          [-0.8312, -1.0318, -1.7815,  ...,  0.2974, -1.2278, -0.3933],
          [-0.4312, -0.8275, -1.4842,  ..., -0.1300, -1.5446, -0.6574]]]),
 tensor([[[ 2.0361e-10,  3.6427e-06,  4.3772e-11,  ..., -1.3933e-03,
           -4.7057e-06, -9.9355e-01],
          [ 7.2584e-11,  2.0872e-05,  3.8905e-11,  ..., -3.1245e-03,
           -1.6488e-06, -9.9408e-01],
          [ 4.9811e-11,  2.0812e-06,  4.4098e-11,  ..., -1.3348e-03,
           -6.3097e-06, -9.9413e-01]],
 
         [[-4.5433e-01,  2.3301e-04,  2.6471e-01,  ...,  8.6803e-02,
            6.6469e-03, -2.1674e-01],
          [-5.3327e-01, -9.4982e-04,  4.9209e-01,  ...,  3.0989e-01,
            1.8197e-01, -2.6571e-01],
          [-3.9661e-01, 