In [1]:
from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb


max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
          validation_data=[x_test, y_test])

Using TensorFlow backend.


Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train...
Instructions for updating:
Use tf.cast instead.
Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x253455b1cc0>

In [5]:
from elmoformanylangs import Embedder

e = Embedder("../Lib\ELMoForManyLangs-master/179/")

sents = [['今', '天', '天气', '真', '好', '阿'],
['潮水', '退', '了', '就', '知道', '谁', '没', '穿', '裤子']]

ret = e.sents2elmo(sents)

2019-02-22 18:36:00,243 INFO: char embedding size: 15889
2019-02-22 18:36:01,041 INFO: word embedding size: 140384
2019-02-22 18:36:08,988 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(140384, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(15889, 50, padding_idx=15886)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out

In [10]:
print( ret )

ret[0].shape

[array([[-0.32071367,  0.37477803,  0.44618535, ..., -0.19431628,
         0.06710131, -0.34152016],
       [ 0.1147376 ,  0.33311912,  0.14604583, ..., -0.64240247,
         0.138606  , -0.23962545],
       [ 0.49329403, -0.05017295,  0.24366517, ..., -0.5028746 ,
         0.08223604, -0.27072644],
       [-0.32768497, -0.41654024,  0.03987445, ...,  0.3921158 ,
        -0.05817861,  0.05169688],
       [-0.12619333, -0.17445414, -0.15186244, ...,  0.04713022,
         0.09232438,  0.21923883],
       [ 0.0106816 , -0.46860933, -0.21014905, ..., -0.1560423 ,
        -0.46452352,  0.2652532 ]], dtype=float32), array([[ 0.26687804, -0.46576166, -0.0368228 , ..., -0.2980199 ,
         0.58324593, -0.00627147],
       [ 0.1143209 ,  0.14118378,  0.32629177, ..., -0.09103835,
         0.0233464 ,  0.00290355],
       [ 0.37816277, -0.2545798 , -0.20076199, ..., -0.49050307,
        -0.2362246 , -0.22558017],
       ...,
       [ 0.16999005,  0.8668627 ,  0.0985984 , ...,  0.11408808,
     

(6, 1024)