In [1]:
from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb


max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
          validation_data=[x_test, y_test])

Using TensorFlow backend.


Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train...
Instructions for updating:
Use tf.cast instead.
Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x253455b1cc0>

In [23]:
from elmoformanylangs import Embedder

e = Embedder("../../Lib/ELMoForManyLangs-master/179/")

sents = [['<s>', '今', '天', '</s>'],
         ['<s>', '今', '早', '</s>'],
         ['今', '早'],
         ['潮水', '退', '了', '就', '知道', '谁', '没', '穿', '裤子']]

ret = e.sents2elmo(sents)

2019-02-23 16:33:02,338 INFO: char embedding size: 15889
2019-02-23 16:33:03,203 INFO: word embedding size: 140384
2019-02-23 16:33:11,821 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(140384, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(15889, 50, padding_idx=15886)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out

In [25]:
print( "s1:", ret[0] )
print( "s2:", ret[1] )
print( "s3:", ret[2] )

ret[0].shape

s1: [[-0.20985287 -0.08913825  0.1913069  ... -0.01044362  0.0020753
  -0.09127604]
 [-0.2858292   0.3970922   0.5750546  ... -0.04289003  0.1665876
  -0.33300433]
 [ 0.03281761  0.26703635  0.28829154 ... -0.5020779   0.34220958
  -0.20063843]
 [ 0.060148    0.02095515  0.03092937 ... -0.15841854 -0.14785396
  -0.11840415]]
s2: [[-0.20985287 -0.08913825  0.1913069  ...  0.04952222 -0.03935048
  -0.1509148 ]
 [-0.2858292   0.3970922   0.5750546  ...  0.00409336  0.34407106
   0.00501234]
 [ 0.2967701   0.07353837  0.07266765 ... -0.20391579  0.35103118
  -0.0575985 ]
 [-0.0067278  -0.08645117  0.06460712 ... -0.15841854 -0.14785396
  -0.11840415]]
s3: [[-0.32071355  0.37477794  0.44618526 ...  0.02332092  0.24520528
  -0.24297166]
 [ 0.31469     0.0853551  -0.02907125 ... -0.00058746  0.31512263
  -0.00843669]]


(4, 1024)