Load dataset. It contains translations from English to Spanish, so swap the order of the phrases. Also add `\t` and `\n` as the start and stop tokens in the target sequences. 

In [1]:
start_token = "\t"
stop_token = "\n"

with open("data/spa.txt", "r", encoding="utf-8") as f:
    samples = f.read().split("\n")

samples = [sample.strip().split("\t")
           for sample in samples if len(sample.strip()) > 0]

samples = [(es, start_token + en + stop_token)
           for en, es in samples if len(es) < 45]

In [2]:
len(samples)

99423

In [3]:
print(samples[:2])

[('Ve.', '\tGo.\n'), ('Vete.', '\tGo.\n')]


Split data into train and validation sets.

In [4]:
from sklearn.model_selection import train_test_split

train_samples, valid_samples = train_test_split(samples, train_size=.8, random_state=42)



In [5]:
len(train_samples)

79538

In [6]:
len(valid_samples)

19885

Determine the training vocabulary. Those are the only tokens you can trust the model will know how to handle. 

In [7]:
in_vocab = set()
out_vocab = set()

for in_seq, out_seq in train_samples:
    in_vocab.update(in_seq)
    out_vocab.update(out_seq)
    
in_vocab_size = len(in_vocab)
out_vocab_size = len(out_vocab)

print("Input vocab size:", in_vocab_size)
print("Output vocab size:", out_vocab_size)

Input vocab size: 101
Output vocab size: 87


In [8]:
print(sorted(in_vocab))

[' ', '!', '"', '$', '%', "'", '(', ')', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '«', '°', 'º', '»', '¿', 'Á', 'É', 'Ó', 'Ú', 'á', 'è', 'é', 'í', 'ñ', 'ó', 'ö', 'ú', 'ü', 'ś', 'с', '—', '€']


In [9]:
print(sorted(out_vocab))

['\t', '\n', ' ', '!', '"', '$', '%', "'", ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '°', 'á', 'ã', 'è', 'é', 'ö', '‘', '’', '₂', '€']


Go through validation set and remove any tokens not present in the training set.

In [10]:
tmp_samples = []
for in_seq, out_seq in valid_samples:
    tmp_in_seq = [c for c in in_seq if c in in_vocab]
    tmp_out_seq = [c for c in out_seq if c in out_vocab]

    tmp_samples.append(("".join(tmp_in_seq), "".join(tmp_out_seq)))
    
valid_samples = tmp_samples

Build sequence-to-sequence model with bidirectional encoder.

In [11]:
import keras
from keras.layers import Concatenate, Dense, Input, GRU, Masking
from keras.models import Model

Using TensorFlow backend.


Create encoder with two GRU layers, each processing the input in a different direction

In [12]:
latent_dim = 512

encoder_in = Input(shape=(None, in_vocab_size), name="encoder_in")
encoder_mask = Masking(name="encoder_mask")(encoder_in)

fwd_enc_gru = GRU(latent_dim, recurrent_dropout=0.3, name="fwd_enc_gru")
rev_enc_gru = GRU(latent_dim, go_backwards=True, recurrent_dropout=0.3, name="rev_enc_gru")
fwd_enc_out = fwd_enc_gru(encoder_mask)
rev_enc_out = rev_enc_gru(encoder_mask)

encoder_out = Concatenate(name="encoder_out")([fwd_enc_out, rev_enc_out])

Decoder's input dimensions are twice as big as the encoder's output dimensions because you combine the forward and reverse outputs 

In [13]:
decoder_latent_dim = latent_dim * 2

In [14]:
decoder_in = Input(shape=(None, out_vocab_size), name="decoder_in")

decoder_mask = Masking(name="decoder_mask")(decoder_in)
decoder_gru = GRU(decoder_latent_dim, return_sequences=True,
                  return_state=True, dropout=0.2, recurrent_dropout=0.3,
                  name="decoder_gru")
decoder_gru_out, _ = decoder_gru(decoder_mask, initial_state=encoder_out)
decoder_dense = Dense(out_vocab_size, activation="softmax", name="decoder_out")
decoder_out = decoder_dense(decoder_gru_out)

In [15]:
seq2seq_model = Model([encoder_in, decoder_in], decoder_out)
seq2seq_model.compile(optimizer="rmsprop", loss="categorical_crossentropy")

In [16]:
seq2seq_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_in (InputLayer)         (None, None, 101)    0                                            
__________________________________________________________________________________________________
encoder_mask (Masking)          (None, None, 101)    0           encoder_in[0][0]                 
__________________________________________________________________________________________________
decoder_in (InputLayer)         (None, None, 87)     0                                            
__________________________________________________________________________________________________
fwd_enc_gru (GRU)               (None, 512)          943104      encoder_mask[0][0]               
__________________________________________________________________________________________________
rev_enc_gr

Create maps to convert characters to and from ints. 

In [17]:
in_token2int = {token : i for i, token in enumerate(sorted(in_vocab))}
out_token2int = {token : i for i, token in enumerate(sorted(out_vocab))}
out_int2token = {i : token for (token, i) in out_token2int.items()}

Create helper functions for one-hot encoding sequences for use with the model.

In [18]:
import numpy as np

def make_batch_storage(batch_size, in_seq_len, out_seq_len):
    
    enc_in_seqs = np.zeros(
        (batch_size, in_seq_len, in_vocab_size),
        dtype=np.float32)

    dec_in_seqs = np.zeros(
        (batch_size, out_seq_len, out_vocab_size),
        dtype=np.float32)

    dec_out_seqs = np.zeros(
        (batch_size, out_seq_len, out_vocab_size),
        dtype=np.float32)
        
    return enc_in_seqs, dec_in_seqs, dec_out_seqs


In [19]:
def encode_batch(samples):
    batch_size = len(samples)
    max_in_length = max([len(seq) for seq, _ in samples])
    max_out_length = max([len(seq) for _, seq in samples])

    enc_in_seqs, dec_in_seqs, dec_out_seqs = make_batch_storage(
        batch_size, max_in_length, max_out_length)
    
    for i, (in_seq, out_seq) in enumerate(samples):
        for time_step, token in enumerate(in_seq):
            enc_in_seqs[i, time_step, in_token2int[token]] = 1

        for time_step, token in enumerate(out_seq):
            dec_in_seqs[i, time_step, out_token2int[token]] = 1

        for time_step, token in enumerate(out_seq[1:]):
            dec_out_seqs[i, time_step, out_token2int[token]] = 1
            
    return enc_in_seqs, dec_in_seqs, dec_out_seqs

Train model

In [20]:
from seq2seq_util import Seq2SeqBatchGenerator

batch_size = 64
train_generator = Seq2SeqBatchGenerator(train_samples, batch_size, encode_batch)
valid_generator = Seq2SeqBatchGenerator(valid_samples, batch_size, encode_batch)

### Warning:

Running the following cell can take a long time. If you are just experimenting, consider changing the `epochs` value to something small, from `1` to `10`.

In [21]:
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

seq2seq_model.fit_generator(train_generator, epochs=500,
                            validation_data=valid_generator,
                            callbacks=[early_stopping])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7f5960cd24e0>

Create encoder/decoder models for inference

In [22]:
inf_encoder = Model(encoder_in, encoder_out)

In [23]:
inf_encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_in (InputLayer)         (None, None, 101)    0                                            
__________________________________________________________________________________________________
encoder_mask (Masking)          (None, None, 101)    0           encoder_in[0][0]                 
__________________________________________________________________________________________________
fwd_enc_gru (GRU)               (None, 512)          943104      encoder_mask[0][0]               
__________________________________________________________________________________________________
rev_enc_gru (GRU)               (None, 512)          943104      encoder_mask[0][0]               
__________________________________________________________________________________________________
encoder_ou

In [24]:
inf_dec_h_in = Input(shape=(decoder_latent_dim,), name="decoder_h_in")

inf_dec_gru_out, inf_dec_h_out = decoder_gru(
    decoder_in, initial_state=inf_dec_h_in)

inf_dec_out = decoder_dense(inf_dec_gru_out)

inf_decoder = Model(
    [decoder_in, inf_dec_h_in],
    [inf_dec_out, inf_dec_h_out])

In [25]:
inf_decoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_in (InputLayer)         (None, None, 87)     0                                            
__________________________________________________________________________________________________
decoder_h_in (InputLayer)       (None, 1024)         0                                            
__________________________________________________________________________________________________
decoder_gru (GRU)               [(None, None, 1024), 3416064     decoder_in[0][0]                 
                                                                 decoder_h_in[0][0]               
__________________________________________________________________________________________________
decoder_out (Dense)             (None, None, 87)     89175       decoder_gru[1][0]                
Total para

Test trained model on the first 100 samples from both the training and validation sets.

In [26]:
max_out_seq_len = max([len(seq) for _, seq in samples])
print("Max output length: ", max_out_seq_len)

start_token_idx = out_token2int[start_token]
stop_token_idx = out_token2int[stop_token]

Max output length:  87


In [27]:
def translate_sequence(one_hot_seq, encoder, decoder):
    encoding = encoder.predict(one_hot_seq)
    
    decoder_in = np.zeros((1, 1, out_vocab_size), dtype=np.float32)

    translated_text = ''
    done_decoding = False
    decoded_idx = start_token_idx
    while not done_decoding:
        decoder_in[0, 0, decoded_idx] = 1
        decoding, encoding = decoder.predict([decoder_in, encoding])
        decoder_in[0, 0, decoded_idx] = 0

        decoded_idx = np.argmax(decoding[0, -1, :])
        
        if decoded_idx == stop_token_idx:
            done_decoding = True
        else:
            translated_text += out_int2token[decoded_idx]

        if len(translated_text) >= max_out_seq_len:
            done_decoding = True
            
    return translated_text

Validation samples:

In [28]:
from seq2seq_util import test_predictions

test_predictions(valid_samples[:100], inf_encoder, inf_decoder, encode_batch, translate_sequence)

-----------------------------------------
Input sentence: A todos nos gusta montar en bici.
Dataset translation: 	We all like cycling.

Model output: We like to like bicycle.
-----------------------------------------
Input sentence: Tom se rió de todos los chistes de Mary.
Dataset translation: 	Tom laughed at all of Mary's jokes.

Model output: Tom laughed at Mary all the games.
-----------------------------------------
Input sentence: Tom es un asqueroso.
Dataset translation: 	Tom is a creep.

Model output: Tom is a scream.
-----------------------------------------
Input sentence: ¿Cuál es tu meta en la vida?
Dataset translation: 	What's your aim in life?

Model output: What's your life in the life?
-----------------------------------------
Input sentence: Ella le escucha, aunque nadie más lo haga.
Dataset translation: 	She listens to him even though no one else does.

Model output: She listens to him anything but I don't know.
-----------------------------------------
Input sentence:

-----------------------------------------
Input sentence: Si quieres tu dinero de vuelta, solo dilo.
Dataset translation: 	If you want your money back, just say so.

Model output: If you want my money, I'll blame you alone.
-----------------------------------------
Input sentence: Nadie me dijo nada.
Dataset translation: 	No one said anything to me.

Model output: No one told me anything.
-----------------------------------------
Input sentence: A Tom le gusta estar rodeado de gente.
Dataset translation: 	Tom likes having people around.

Model output: Tom likes to be proud of people.
-----------------------------------------
Input sentence: Tu madre se encuentra en estado crítico.
Dataset translation: 	Your mother is in critical condition.

Model output: Your mother is coming to criminal than crime.
-----------------------------------------
Input sentence: Hay un gato debajo de la mesa.
Dataset translation: 	There's a cat under the table.

Model output: There is a cat on the table.
---

-----------------------------------------
Input sentence: Tom bebe.
Dataset translation: 	Tom drinks.

Model output: Tom drinks.
-----------------------------------------
Input sentence: Cuando lo oyó, le entraron ganas de llorar.
Dataset translation: 	When she heard that, she felt like crying.

Model output: When he could hear it, but come in crying.


Training samples:

In [29]:
test_predictions(train_samples[:100], inf_encoder, inf_decoder, encode_batch, translate_sequence)

-----------------------------------------
Input sentence: Después de una larga espera pudimos entrar.
Dataset translation: 	We got in after a long wait.

Model output: After a long time, we can get in the world.
-----------------------------------------
Input sentence: Lo siento, pero es imposible.
Dataset translation: 	I'm sorry, but it's impossible.

Model output: I'm sorry, but it's impossible.
-----------------------------------------
Input sentence: Parecía satisfecho.
Dataset translation: 	He looked pleased.

Model output: He looked satisfied.
-----------------------------------------
Input sentence: Saqué el pastel del horno.
Dataset translation: 	I took the cake out of the oven.

Model output: I took the cake on the hotel.
-----------------------------------------
Input sentence: Es un trabajo muy difícil.
Dataset translation: 	That's a very tough job.

Model output: It's a very difficult job.
-----------------------------------------
Input sentence: Dijiste que no entendías.
D

-----------------------------------------
Input sentence: Tienes que responder a la pregunta.
Dataset translation: 	You need to answer the question.

Model output: You need to answer the question.
-----------------------------------------
Input sentence: ¿Has vivido aquí?
Dataset translation: 	Did you live here?

Model output: Did you live here?
-----------------------------------------
Input sentence: He decidido hacer eso solo.
Dataset translation: 	I've decided to do that by myself.

Model output: I've decided to do that by myself.
-----------------------------------------
Input sentence: Ella le vio comerse un sándwich.
Dataset translation: 	She saw him eating a sandwich.

Model output: She saw him eat a sandwich.
-----------------------------------------
Input sentence: No quiero esta camisa.
Dataset translation: 	I don't want this shirt.

Model output: I don't want this shirt.
-----------------------------------------
Input sentence: Él se marchó hace diez minutos.
Dataset transl

-----------------------------------------
Input sentence: ¿Quién se robó mi canasto con la carne?
Dataset translation: 	Who stole my basket with the meat?

Model output: Who stole my key to the passport?
-----------------------------------------
Input sentence: No estaba manejando tan rápido.
Dataset translation: 	I wasn't driving all that fast.

Model output: I wasn't driving so fast.
-----------------------------------------
Input sentence: Están muy lejos.
Dataset translation: 	They are very far away.

Model output: They're very far.
-----------------------------------------
Input sentence: Ella hizo un viaje a Europa el mes pasado.
Dataset translation: 	She made a trip to Europe last month.

Model output: She made a large train last month.
-----------------------------------------
Input sentence: No tengo tiempo para juegos.
Dataset translation: 	I don't have time for games.

Model output: I don't have time for time.
-----------------------------------------
Input sentence: Compré 

Export model in Core ML format.

In [30]:
coreml_enc_in = Input(shape=(None, in_vocab_size), name="encoder_in")

coreml_fwd_enc_gru = GRU(latent_dim, name="fwd_enc_gru")
coreml_rev_enc_gru = GRU(latent_dim, go_backwards=True, name="rev_enc_gru")

coreml_fwd_out = coreml_fwd_enc_gru(coreml_enc_in)
coreml_rev_out = coreml_rev_enc_gru(coreml_enc_in)

coreml_enc_out = Concatenate(name="encoder_out")([coreml_fwd_out, coreml_rev_out])

coreml_encoder_model = Model(coreml_enc_in, coreml_enc_out)
coreml_encoder_model.output_layers = coreml_encoder_model._output_layers

inf_encoder.save_weights("Es2EnBidirGruCharEncoderWeights.h5")
coreml_encoder_model.load_weights("Es2EnBidirGruCharEncoderWeights.h5")

In [31]:
import coremltools

coreml_encoder = coremltools.converters.keras.convert(
    coreml_encoder_model,
    input_names="oneHotEncodedSeq",
    output_names="decodersIntialState")

coreml_encoder.save("Es2EnBidirGruCharEncoder.mlmodel")

0 : encoder_in, <keras.engine.input_layer.InputLayer object at 0x7f58da6f3828>
1 : fwd_enc_gru, <keras.layers.recurrent.GRU object at 0x7f58da6f3860>
2 : rev_enc_gru, <keras.layers.recurrent.GRU object at 0x7f58da6f3908>
3 : encoder_out, <keras.layers.merge.Concatenate object at 0x7f58da6f3a20>


In [32]:
coreml_dec_in = Input(shape=(None, out_vocab_size))

coreml_dec_gru = GRU(decoder_latent_dim, return_sequences=True, return_state=True, name="decoder_gru")
coreml_dec_gru_out, _ = coreml_dec_gru(coreml_dec_in)
coreml_dec_dense = Dense(out_vocab_size, activation="softmax")
coreml_dec_out = coreml_dec_dense(coreml_dec_gru_out)

coreml_decoder_model = Model(coreml_dec_in, coreml_dec_out)
coreml_decoder_model.output_layers = coreml_decoder_model._output_layers

inf_decoder.save_weights("Es2EnBidirGruCharDecoderWeights.h5")
coreml_decoder_model.load_weights("Es2EnBidirGruCharDecoderWeights.h5")

In [33]:
coreml_decoder = coremltools.converters.keras.convert(
    coreml_decoder_model,
    input_names="encodedChar",
    output_names="nextCharProbs")

coreml_decoder.save("Es2EnBidirGruCharDecoder.mlmodel")

0 : input_1, <keras.engine.input_layer.InputLayer object at 0x7f58da21a278>
1 : decoder_gru, <keras.layers.recurrent.GRU object at 0x7f58da21a2b0>
2 : dense_1, <keras.layers.core.Dense object at 0x7f58da21a4a8>
3 : dense_1__activation__, <keras.layers.core.Activation object at 0x7f58da27ee10>


Convert weights to 16bit floats. This shouldn't hurt performance much, if at all, and it reduces the app's download size.

In [34]:
def convert_to_fp16(mlmodel_filename):
    basename = mlmodel_filename[:-len(".mlmodel")]
    spec = coremltools.utils.load_spec(mlmodel_filename)
    spec_16bit = \
      coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)
    coremltools.utils.save_spec(spec_16bit, f"{basename}16Bit.mlmodel")

In [35]:
convert_to_fp16("Es2EnBidirGruCharEncoder.mlmodel")
convert_to_fp16("Es2EnBidirGruCharDecoder.mlmodel")

In [36]:
import json

with open("esCharToInt.json", "w") as f:
    json.dump(in_token2int, f)
with open("intToEnChar.json", "w") as f:
    json.dump(out_int2token, f)