In [252]:
import numpy as np
from tensorflow import keras
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers.recurrent import LSTM

In [253]:
batch_size = 64
epochs = 100
latent_dim = 256
num_texts = 10000 

In [263]:
inp_text, out_text = [], []
with open('spa.txt') as f:
    for line in f:
        x, y = line.split('\t')
        inp_text.append(x)
        out_text.append(y)

In [264]:
out_text[0]

'Ve.\n'

In [265]:
inp_chars = ['\t'] + sorted(list(set(" ".join(inp_text))))
out_chars = ['\t'] + sorted(list(set(" ".join(out_text))))

In [266]:
len(inp_chars), len(out_chars)

(91, 114)

In [267]:
inp_max_len, out_max_len = max([len(x) for x in inp_text[:num_texts]]), max([len(x) for x in out_text[:num_texts]])

In [268]:
inp_max_len, out_max_len

(17, 41)

In [269]:
inp_c_ind = { c : i for i,c in enumerate(inp_chars) }
out_c_ind = { c : i for i,c in enumerate(out_chars) }

In [270]:
inp_data = np.zeros((num_texts, inp_max_len, len(inp_chars)))
out_data = np.zeros((num_texts, out_max_len+1, len(out_chars)))
out_target = np.zeros((num_texts, out_max_len+1, len(out_chars)))

In [271]:
for i, s in enumerate(inp_text[:num_texts]):
    inp_shift = inp_max_len - len(s)
    for j, c in enumerate(s):
        inp_data[i, j, inp_c_ind[c]] = 1.
    out_data[i, 0, 0] = 1.
    for j, c in enumerate(out_text[i]):
        out_data[i, j+1, out_c_ind[c]] = 1.
        out_target[i, j, out_c_ind[c]] = 1.

In [272]:
from keras.layers import Input, Dense
from keras.layers.recurrent import LSTM

In [273]:
enc_input = Input(shape=(inp_max_len, len(inp_chars)))
dec_input = Input(shape=(out_max_len+1, len(out_chars)))

In [274]:
encoder = LSTM(latent_dim, return_state=True)
enc_output, enc_h, enc_c = encoder(enc_input)

In [275]:
decoder = LSTM(latent_dim, return_state=True, return_sequences=True)

In [276]:
dec_output, dec_h, dec_c = decoder(dec_input, initial_state=[enc_h, enc_c])

In [277]:
dec_dense = Dense(len(out_chars), activation='softmax')
dec_chars = dec_dense(dec_output)

In [278]:
from keras.models import Model

In [279]:
model = Model([enc_input, dec_input], dec_chars)

In [280]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [281]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_35 (InputLayer)            (None, 17, 91)        0                                            
____________________________________________________________________________________________________
input_36 (InputLayer)            (None, 42, 114)       0                                            
____________________________________________________________________________________________________
lstm_11 (LSTM)                   [(None, 256), (None,  356352      input_35[0][0]                   
____________________________________________________________________________________________________
lstm_12 (LSTM)                   [(None, 42, 256), (No 379904      input_36[0][0]                   
                                                                   lstm_11[0][1]           

In [None]:
model.fit([inp_data, out_data], out_target, batch_size=batch_size, epochs=50, validation_split=0.1)

In [None]:
model.fit([inp_data, out_data], out_target, batch_size=batch_size, epochs=50, validation_split=0.1)

In [283]:
enc_model = Model(enc_input, (enc_h, enc_c))

In [284]:
dec_sample_h = Input(shape=(latent_dim,))
dec_sample_c = Input(shape=(latent_dim,))
dec_sample_lstm, _, _ = decoder(dec_input, initial_state=(dec_sample_h, dec_sample_c))
dec_sample_char = dec_dense(dec_sample_lstm)
dec_sampler = Model([dec_input, dec_sample_h, dec_sample_c], dec_sample_char)

In [285]:
s = 'Hello!'
def convert_to_inp_data(s):
    res = np.zeros((1, inp_max_len, len(inp_chars)))
    inp_shift = inp_max_len - len(s)
    for j, c in enumerate(s):
        inp_data[i, inp_shift + j, inp_c_ind[c]] = 1.
    return res

In [286]:
convert_to_inp_data(s).shape

(1, 17, 91)

In [287]:
cur_h, cur_c = enc_model.predict(convert_to_inp_data('Hello!'))

In [288]:
cur_h.shape

(1, 256)

In [289]:
def convert_to_out_data(s):
    res = np.zeros((1, out_max_len+1, len(out_chars)))
    res[0, 0, 0] = 1.
    out_target[i, len(out_text[i]), 0] = 1.
    for j, c in enumerate(s):
        out_data[i, j+1, out_c_ind[c]] = 1.
        out_target[i, j, out_c_ind[c]] = 1.
    return res

In [290]:
convert_to_out_data('').shape

(1, 42, 114)

In [291]:
def translate_sentence(s):
    cur_h, cur_c = enc_model.predict(convert_to_inp_data(s))
    out_string = ''
    while len(out_string) < out_max_len:
        x = convert_to_out_data(out_string)
        model_output = dec_sampler.predict([x, cur_h, cur_c])
        y = out_chars[np.argmax(model_output[0,len(out_string),:])]
        if y == '\t':
            break
        out_string = out_string + y
    return out_string

In [None]:
translate_sentence("Hi!")

In [None]:
s = 'Hi!'
cur_h, cur_c = enc_model.predict(convert_to_inp_data(s))
print(cur_h, cur_c)
out_string = ''
while len(out_string) < out_max_len:
    x = convert_to_out_data(out_string)
    model_output = dec_sampler.predict([x, cur_h, cur_c])
    y = out_chars[np.argmax(model_output[0,len(out_string),:])]
    if y == '\n':
        break
    out_string = out_string + y
print(out_string)