In [10]:
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.layers import Input, Dense, Embedding, Masking, Bidirectional
from keras.layers.recurrent import LSTM
from keras.models import Model
from keras.layers.wrappers import TimeDistributed
import keras

# Toy data

In [2]:
samples = [
    (u"autót", "BEEEB"),
    (u"autót", "BEEEB"),
    (u"autót", "BEEEB"),
    (u"autókat", "BEEEBEB"),
]

## Featurizing the toy dataset

In [3]:
maxlen = max(len(s[0]) for s in samples)
vocab_x = {'PAD': 0}
vocab_y = {'PAD': 0}

def pad_sample(sample):
    return [0] * (maxlen - len(sample)) + sample

data_x = [pad_sample([vocab_x.setdefault(c, len(vocab_x)) for c in sample[0]]) for sample in samples]
data_y = [to_categorical(pad_sample([vocab_y.setdefault(c, len(vocab_y)) for c in sample[1]])) for sample in samples]

data_x = np.array(data_x)
data_y = np.array(data_y)

# Model parameters

In [4]:
timesteps = maxlen
batch_size = 4
vocab_size = len(vocab_x)
embedding_size = 10
seq_size = 50
mlp_size = len(vocab_y)

# Model definition

In [5]:
xin = Input(batch_shape=(batch_size, timesteps), dtype='int32')
xemb = Embedding(vocab_size, embedding_size)(xin)
xemb = Masking(mask_value=0.)(xemb)
seq = Bidirectional(LSTM(seq_size, return_sequences=True))(xemb)
mlp = TimeDistributed(Dense(mlp_size, activation='softmax'))(seq)
model = Model(inputs=xin, outputs=mlp)
model.compile(optimizer='Adam', loss='categorical_crossentropy')

# Training and testing

In [6]:
model.fit(data_x, data_y, epochs=500, verbose=0)

<keras.callbacks.History at 0x7fd4f8bbcfd0>

In [8]:
model.evaluate(data_x, data_y)



0.00095214252360165119

In [9]:
model.save('/tmp/toy_model')

In [12]:
m = keras.models.load_model('/tmp/toy_model')
m.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (4, 7)                    0         
_________________________________________________________________
embedding_1 (Embedding)      (4, 7, 10)                60        
_________________________________________________________________
masking_1 (Masking)          (4, 7, 10)                0         
_________________________________________________________________
bidirectional_1 (Bidirection (4, 7, 100)               24400     
_________________________________________________________________
time_distributed_1 (TimeDist (4, 7, 3)                 303       
Total params: 24,763
Trainable params: 24,763
Non-trainable params: 0
_________________________________________________________________


In [13]:
m.predict(data_x)

array([[[  9.99483585e-01,   4.77622438e-04,   3.86894644e-05],
        [  9.99053895e-01,   8.44825641e-04,   1.01257952e-04],
        [  4.58437018e-04,   9.99414802e-01,   1.26775732e-04],
        [  8.96619881e-08,   1.07046770e-04,   9.99892831e-01],
        [  3.00282021e-11,   3.59821314e-07,   9.99999642e-01],
        [  2.60060840e-09,   3.54139105e-04,   9.99645829e-01],
        [  2.98374744e-07,   9.98308778e-01,   1.69083721e-03]],

       [[  9.99483585e-01,   4.77622438e-04,   3.86894644e-05],
        [  9.99053895e-01,   8.44825641e-04,   1.01257952e-04],
        [  4.58437018e-04,   9.99414802e-01,   1.26775732e-04],
        [  8.96619881e-08,   1.07046770e-04,   9.99892831e-01],
        [  3.00282021e-11,   3.59821314e-07,   9.99999642e-01],
        [  2.60060840e-09,   3.54139105e-04,   9.99645829e-01],
        [  2.98374744e-07,   9.98308778e-01,   1.69083721e-03]],

       [[  9.99483585e-01,   4.77622438e-04,   3.86894644e-05],
        [  9.99053895e-01,   8.44825