In [18]:
from __future__ import print_function
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM, GRU
from keras.layers import merge, Input
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import Nadam, RMSprop
from keras.utils.data_utils import get_file
from datetime import datetime
import numpy as np
import rethinkdb as r
import random
import sys

In [19]:
# Connect to RethinkDB instance
r.connect("localhost", 28015, db="robot_does_x").repl()

# Path to latest weights
file_path = "model_latest.hdf5"

# Create instance id using current time
instance_id = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
instance_id = 'single_lstm_rmsprop_60_' + instance_id
print("instance_id: ", instance_id)

# Open dataset
text = open('../data/test.txt').read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
        y[i, t, char_indices[next_chars[i]]] = 1

instance_id:  single_lstm_rmsprop_60_2016-12-21 14:46:15
corpus length: 384
total chars: 31
nb sequences: 115
Vectorization...


In [20]:
print('Build model...')
input_1 = Input(shape=(maxlen, len(chars)))
lstm_1 = LSTM(128, return_sequences=True, init= 'orthogonal')(input_1)
lstm_2 = LSTM(128, return_sequences=True, init= 'orthogonal')(lstm_1)
timedistributed_1 = TimeDistributed(Dense(len(chars), init= 'orthogonal'))(lstm_2)
merge_1 = merge([input_1, timedistributed_1], mode='sum')
timedistributed_2 = TimeDistributed(Dense(len(chars), activation='softmax', init='orthogonal'))(merge_1)
model = Model(input=input_1, output=timedistributed_2)


optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

# Print model summary
print(model.summary())

model.fit(X, y, nb_epoch=1)

Build model...
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 40, 31)        0                                            
____________________________________________________________________________________________________
lstm_7 (LSTM)                    (None, 40, 128)       81920       input_1[0][0]                    
____________________________________________________________________________________________________
lstm_8 (LSTM)                    (None, 40, 128)       131584      lstm_7[0][0]                     
____________________________________________________________________________________________________
timedistributed_7 (TimeDistribute(None, 40, 31)        3999        lstm_8[0][0]                     
____________________________________________________________________________

<keras.callbacks.History at 0x113604ba8>

In [28]:

x = np.zeros((1, maxlen, len(chars)))
for t, char in enumerate(sentence):
    x[0, t, char_indices[char]] = 1.
    
print(x.shape)
preds = model.predict(x, verbose=0)[0][-1]
print(preds.shape)

(1, 40, 31)
(31,)
