In [1]:
input_texts = [
    "hello there",
    "how are you",
    "i am fine",
]

target_texts = [
    "hola allí",
    "cómo estás",
    "estoy bien",
]

# for decoder we need to add starting symbol in inputs and ouput symbol in outputs
target_texts_in = ["<SOS> " + t for t in target_texts]
target_texts_out = [t + " <EOS>" for t in target_texts]

In [85]:
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# encoder tokenizer
encoder_tokenizer=Tokenizer(filters='')
encoder_tokenizer.fit_on_texts(input_texts)
encoder_vocab=len(encoder_tokenizer.word_index)+1

# decoder tokenizer
decoder_tokenizer=Tokenizer(filters='')
decoder_tokenizer.fit_on_texts(target_texts_in+target_texts_out)
decoder_vocab=len(decoder_tokenizer.word_index)+1

In [87]:
encoder_input_sequences=encoder_tokenizer.texts_to_sequences(input_texts)
decoder_input_sequences=decoder_tokenizer.texts_to_sequences(target_texts_in)
decoder_output_sequences=decoder_tokenizer.texts_to_sequences(target_texts_out)

In [88]:
max_encoder_len = max(len(s) for s in encoder_input_sequences)
max_decoder_len = max(len(s) for s in decoder_input_sequences)

encoder_input_sequences = pad_sequences(encoder_input_sequences,maxlen=max_encoder_len,padding='post')
decoder_input_sequences = pad_sequences(decoder_input_sequences,maxlen=max_decoder_len,padding='post')
decoder_output_sequences = pad_sequences(decoder_output_sequences,maxlen=max_decoder_len,padding='post')

In [89]:
max_encoder_len,max_decoder_len

(3, 3)

In [121]:
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input,LSTM,Embedding,Dense
import numpy as np


n_units=128
embed_dim=10

# encoder
encoder_inputs=Input(shape=(None,))
encoder_embeddings=Embedding(encoder_vocab,embed_dim)(encoder_inputs)

encoder_lstm=LSTM(n_units,return_state=True)
encoder_outputs, state_h, state_c=encoder_lstm(encoder_embeddings)
encoder_states=[state_h,state_c]  # this is context vector


# decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding_layer = Embedding(decoder_vocab, embed_dim)

decoder_embeddings = decoder_embedding_layer(decoder_inputs)
decoder_lstm=LSTM(n_units,return_state=True,return_sequences=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embeddings,initial_state=encoder_states)
decoder_dense=Dense(decoder_vocab,activation="softmax")
decoder_outputs=decoder_dense(decoder_outputs)

# model for training
model=Model([encoder_inputs,decoder_inputs],decoder_outputs)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")

model.summary()

In [122]:
# Inference / prediction

# encoder model
encoder_model = Model(encoder_inputs, encoder_states)

# decoder model
decoder_state_input_h = Input(shape=(n_units,))
decoder_state_input_c = Input(shape=(n_units,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_embeddings_inf = decoder_embedding_layer(decoder_inputs)
decoder_outputs_inf,state_h_inf,state_c_inf=decoder_lstm(decoder_embeddings_inf,initial_state=decoder_states_inputs)
decoder_outputs_inf=decoder_dense(decoder_outputs_inf)
decoder_states = [state_h_inf, state_c_inf]


decoder_Model=Model([decoder_inputs]+decoder_states_inputs, [decoder_outputs_inf]+decoder_states)

In [123]:
def decode_sentences(input_seq):
  states=encoder_model.predict(input_seq)

  target_seq=np.zeros((1,1))
  target_seq[0,0]=decoder_tokenizer.word_index['<sos>']

  decoded=[]
  while True:
    output_tokens,h,c=decoder_Model.predict([target_seq]+states)
    sampled=np.argmax(output_tokens[0,-1,:])
    decoded.append(sampled)

    if sampled==decoder_tokenizer.word_index['<eos>'] or len(decoded)>max_decoder_len:
      break

    target_seq = np.array([[sampled]])
    states = [h, c]

  sent=decoder_tokenizer.sequences_to_texts([decoded])
  return sent

In [124]:
model.fit(x=[encoder_input_sequences,decoder_input_sequences],y=decoder_output_sequences,batch_size=32)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - loss: 2.1971


<keras.src.callbacks.history.History at 0x7d99fc855cd0>

In [125]:
decode_sentences(encoder_input_sequences[1:2])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step


['<eos>']

In [120]:
decoder_tokenizer.word_index

{'<sos>': 1,
 '<eos>': 2,
 'hola': 3,
 'allí': 4,
 'cómo': 5,
 'estás': 6,
 'estoy': 7,
 'bien': 8}

# Multi layer encoder decoder

In [126]:
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input,LSTM,Embedding,Dense
import numpy as np


n_units=128
embed_dim=10

# encoder
encoder_inputs=Input(shape=(None,))
encoder_embeddings=Embedding(encoder_vocab,embed_dim)(encoder_inputs)

encoder_lstm1=LSTM(n_units,return_sequences=True)
encoder_lstm2=LSTM(n_units,return_sequences=True)
encoder_lstm3=LSTM(n_units,return_sequences=True)
encoder_lstm4=LSTM(n_units,return_state=True)

encoder_outputs=encoder_lstm1(encoder_embeddings)
encoder_outputs=encoder_lstm2(encoder_outputs)
encoder_outputs=encoder_lstm3(encoder_outputs)
encoder_outputs, state_h, state_c=encoder_lstm4(encoder_outputs)

encoder_states=[state_h,state_c]  # this is context vector


# decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding_layer = Embedding(decoder_vocab, embed_dim)

decoder_embeddings = decoder_embedding_layer(decoder_inputs)
decoder_lstm1=LSTM(n_units,return_sequences=True)
decoder_lstm2=LSTM(n_units,return_sequences=True)
decoder_lstm3=LSTM(n_units,return_sequences=True)
decoder_lstm4=LSTM(n_units,return_state=True,return_sequences=True)

decoder_outputs= decoder_lstm1(decoder_embeddings)
decoder_outputs= decoder_lstm2(decoder_outputs)
decoder_outputs= decoder_lstm3(decoder_outputs)
decoder_outputs, _, _ = decoder_lstm4(decoder_outputs,initial_state=encoder_states)

decoder_dense=Dense(decoder_vocab,activation="softmax")
decoder_outputs=decoder_dense(decoder_outputs)

# model for training
model=Model([encoder_inputs,decoder_inputs],decoder_outputs)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")

model.summary()

In [127]:
# Inference / prediction

# encoder model
encoder_model = Model(encoder_inputs, encoder_states)

# decoder model
decoder_state_input_h = Input(shape=(n_units,))
decoder_state_input_c = Input(shape=(n_units,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_inputs = Input(shape=(1,))
decoder_embeddings_inf = decoder_embedding_layer(decoder_inputs)

decoder_outputs_inf= decoder_lstm1(decoder_embeddings_inf)
decoder_outputs_inf= decoder_lstm2(decoder_outputs_inf)
decoder_outputs_inf= decoder_lstm3(decoder_outputs_inf)
decoder_outputs_inf,state_h_inf,state_c_inf=decoder_lstm4(decoder_outputs_inf,initial_state=decoder_states_inputs)

decoder_outputs_inf=decoder_dense(decoder_outputs_inf)
decoder_states = [state_h_inf, state_c_inf]


decoder_Model=Model([decoder_inputs]+decoder_states_inputs, [decoder_outputs_inf]+decoder_states)

In [128]:
model.fit(x=[encoder_input_sequences,decoder_input_sequences],y=decoder_output_sequences,batch_size=32)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step - loss: 2.1972


<keras.src.callbacks.history.History at 0x7d99f760b980>

In [131]:
decode_sentences(encoder_input_sequences[1:2])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


['<eos>']