# Training Encoder Decoder Model

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

In [2]:
df= pd.read_csv('deu.txt', sep='\t', usecols=[0, 1])

In [3]:
df = df.iloc[:50000]

In [4]:
df.tail()

Unnamed: 0,Go.,Geh.
49995,No one encouraged Tom.,Niemand bestärkte Tom.
49996,No one encouraged her.,Niemand ermutigte sie.
49997,No one has that right.,Niemand hat dieses Recht.
49998,No one has that right.,Dieses Recht hat niemand.
49999,No one here has a car.,Hier hat niemand ein Auto.


In [5]:
df.rename(columns = {'Go.':'English', 'Geh.':'German'}, inplace = True)

In [6]:
df.tail()

Unnamed: 0,English,German
49995,No one encouraged Tom.,Niemand bestärkte Tom.
49996,No one encouraged her.,Niemand ermutigte sie.
49997,No one has that right.,Niemand hat dieses Recht.
49998,No one has that right.,Dieses Recht hat niemand.
49999,No one here has a car.,Hier hat niemand ein Auto.


In [7]:
df.German = df.German.apply(lambda x: 'sos '+ x +' eos')

In [8]:
eng_vocab= set()
for sent in df.English:
    for word in sent.split():
        if word not in eng_vocab:
            eng_vocab.add(word)

In [9]:
ger_vocab= set()
for sent in df.German:
    for word in sent.split():
        if word not in ger_vocab:
            ger_vocab.add(word)

In [10]:
len(eng_vocab), len(ger_vocab), 

(10152, 16637)

In [11]:
# for zero padding add 1 in them
ENG_VOCAB_SIZE= len(eng_vocab)+1
GER_VOCAB_SIZE= len(ger_vocab)+1
print(ENG_VOCAB_SIZE)
print(GER_VOCAB_SIZE)


10153
16638


#### First we need sorted words list

In [12]:
eng_words = sorted(list(eng_vocab))
ger_words = sorted(list(ger_vocab))

#### Word to number

In [13]:
eng_word_index = dict((w, i) for i, w in enumerate(eng_words))
ger_word_index = dict((w, i) for i, w in enumerate(ger_words))

#### Number to word


In [15]:
eng_index_word = dict((i, w) for i, w in enumerate(eng_words))
ger_index_word = dict((i,w) for i, w in enumerate(ger_words))

### Train test split

In [17]:
X_train, X_test, y_train, y_test= train_test_split(df.English, df.German, test_size=0.1, random_state=0)

In [18]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((45000,), (5000,), (45000,), (5000,))

In [19]:

eng_len_list=df.English.apply(lambda x: len(x.split())).to_list()

ger_len_list=df.German.apply(lambda x: len(x.split())).to_list()


In [20]:
np.max(ger_len_list), np.max(eng_len_list)

(17, 7)

In [21]:
BATCH_SIZE= 64
max_eng_len =  np.max(eng_len_list)
max_ger_len =  np.max(ger_len_list)

In [22]:
max_eng_len, max_ger_len

(7, 17)

In [23]:
def data_batch_generator(x, y, batch_size=BATCH_SIZE):
    while True:
        for i in range(0, len(x), batch_size):
            encoder_input_data = np.zeros((batch_size,max_eng_len ), dtype='float32')
            decoder_input_data = np.zeros((batch_size, max_ger_len), dtype='float32')
            decoder_target_data = np.zeros((batch_size, max_ger_len, GER_VOCAB_SIZE), dtype='float32' )
            for j, (eng_text, ger_text) in enumerate(zip(x[i:i+batch_size], y[i:i+batch_size])):
                for t , word in enumerate(eng_text.split()):
                    encoder_input_data[j,t] = eng_word_index[word]
                for t, word in enumerate(ger_text.split()):
                    if t < len(ger_text.split()) - 1:
                        decoder_input_data[j,t]= ger_word_index[word]
                    if t>0: 
                        decoder_target_data[j,t-1, ger_word_index[word]] = 1.
            yield ([encoder_input_data, decoder_input_data], decoder_target_data)      
            

In [24]:
from tensorflow.keras.layers import LSTM, Dropout, Dense, Embedding
from tensorflow.keras import Input, Model

In [25]:

encoder_input = Input(shape=(None, ))
encoder_embd = Embedding(ENG_VOCAB_SIZE,100, mask_zero=True)(encoder_input)
encoder_lstm = LSTM(100, return_state=True)
encoder_output,state_h, state_c = encoder_lstm(encoder_embd)
encoder_states= [state_h, state_c]





decoder_input = Input(shape=(None,))
decoder_embd = Embedding(GER_VOCAB_SIZE, 100, mask_zero=True)
decoder_embedding= decoder_embd(decoder_input)
decoder_lstm = LSTM(100, return_state=True,return_sequences=True )
decoder_outputs, _, _= decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense= Dense(GER_VOCAB_SIZE, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model1 = Model([encoder_input, decoder_input], decoder_outputs)

In [27]:
from tensorflow.keras.utils import  plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
plot_model(model1,show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [28]:
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

In [29]:
checkpoint = ModelCheckpoint("/content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/", monitor='val_accuracy')

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

callbacks_list = [checkpoint, early_stopping]

In [30]:
steps_per_epoch= np.ceil(len(X_train)/BATCH_SIZE)
steps_per_epoch_val = np.ceil(len(X_train)/BATCH_SIZE)

In [31]:
steps_per_epoch

704.0

In [32]:
EPOCHS= 10 #@param {type:'slider',min:10,max:100, step:10 }
EPOCHS

10

In [33]:
history1= model1.fit(data_batch_generator(X_train,y_train), 
                       epochs=EPOCHS,
                       steps_per_epoch= steps_per_epoch,
                     validation_data=data_batch_generator(X_test, y_test, BATCH_SIZE),
                       validation_steps=steps_per_epoch_val,
                     callbacks=callbacks_list)

Epoch 1/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 2/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 3/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 4/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 5/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 6/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 7/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 8/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 9/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


Epoch 10/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints\assets




In [34]:
model1.save_weights(r'G:\My Drive\Spring 2022\ML\German Translator\saved_models/model1_weights.h5')

In [35]:
model1.load_weights(r'G:\My Drive\Spring 2022\ML\German Translator\saved_models/model1_weights.h5')

# Model2
Now we will try to improve its accurcy with changing some units

In [36]:
encoder_input = Input(shape=(None, ))
encoder_embd = Embedding(ENG_VOCAB_SIZE,1000, mask_zero=True)(encoder_input)
encoder_lstm = LSTM(250, return_state=True)
encoder_output,state_h, state_c = encoder_lstm(encoder_embd)
encoder_states= [state_h, state_c]



decoder_input = Input(shape=(None,))
decoder_embd = Embedding(GER_VOCAB_SIZE, 1000, mask_zero=True)
decoder_embedding= decoder_embd(decoder_input)
decoder_lstm = LSTM(250, return_state=True,return_sequences=True )
decoder_outputs, _, _= decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense= Dense(GER_VOCAB_SIZE, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model2 = Model([encoder_input, decoder_input], decoder_outputs)

In [37]:
from tensorflow.keras.utils import  plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
plot_model(model2,show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [38]:
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

In [39]:
checkpoint = ModelCheckpoint("/content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2/", monitor='val_accuracy')

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

callbacks_list = [checkpoint, early_stopping]

In [40]:
steps_per_epoch= np.ceil(len(X_train)/BATCH_SIZE)
steps_per_epoch_val = np.ceil(len(X_train)/BATCH_SIZE)

In [41]:
EPOCHS= 10 #@param {type:'slider',min:10,max:100, step:10 }
EPOCHS

10

In [42]:
history2= model2.fit(data_batch_generator(X_train,y_train), 
                       epochs=EPOCHS,
                       steps_per_epoch= steps_per_epoch,
                     validation_data=data_batch_generator(X_test, y_test, BATCH_SIZE),
                       validation_steps=steps_per_epoch_val,
                     callbacks=callbacks_list)

Epoch 1/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 2/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 3/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 4/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 5/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 6/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 7/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 8/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 9/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


Epoch 10/10



INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/rnn/machine_translation/Encoder_Decoder/model_checkpoints/model2\assets




In [43]:
model2.save_weights(r'G:\My Drive\Spring 2022\ML\German Translator\saved_models/2_model_weights.h5')

In [44]:
model2.load_weights(r'G:\My Drive\Spring 2022\ML\German Translator\saved_models/2_model_weights.h5')

In [45]:
encoder_model = Model(encoder_input, encoder_states)

In [46]:
decoder_state_input_h = Input(shape=(250,))
decoder_state_input_c= Input(shape=(250,))
decoder_states_input= [decoder_state_input_h, decoder_state_input_c]
dec_embd2 = decoder_embd(decoder_input)



In [None]:
decoder_output2,state_h2, state_c2 = decoder_lstm(dec_embd2, initial_state=decoder_states_input)
deccoder_states2= [state_h2, state_c2]
decoder_output2 = decoder_dense(decoder_output2)
decoder_model = Model(
                      [decoder_input]+decoder_states_input,
                      [decoder_output2]+ deccoder_states2)

In [47]:
def get_predicted_sentence(input_seq):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1,1))
    target_seq[0, 0] = ger_word_index['sos']
    stop_condition = False
    decoded_sentence = ''
    
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = ger_index_word[sampled_token_index]
        decoded_sentence += ' '+sampled_char
        if (sampled_char == 'eos' or len(decoded_sentence) > 50):
            stop_condition = True
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]   
    return decoded_sentence

In [48]:
test_gen= data_batch_generator(X_test,y_test,batch_size=1)

In [49]:
Actual_test_sent = X_test.to_list()
Actual_test_trans= y_test.to_list()

In [50]:
test_inputs=[]
test_outputs=[]

In [51]:
from tqdm import tqdm
for (input, output),_ in tqdm(iter(test_gen)):
    test_inputs.append(input)
    test_outputs.append(output)

45382666it [1:41:01, 7487.10it/s]


KeyboardInterrupt: 

In [52]:
test_inputs[0]

array([[ 580., 7349., 9053., 2108.,    0.,    0.,    0.]], dtype=float32)

In [None]:
#translation 1

In [82]:
Actual_test_sent[786]

'Tom should be angry.'

In [83]:
Actual_test_trans[786] [4:-4]

'Tom müsste böse sein.'

In [84]:
get_predicted_sentence(test_inputs[786])[:-4]

' Tom sollte sich unsicher.'

In [None]:
#translation 2

In [85]:
Actual_test_sent[420]

"Isn't Tom handsome?"

In [86]:
Actual_test_trans[420] [4:-4]

'Ist Tom nicht schön?'

In [87]:
get_predicted_sentence(test_inputs[420])[:-4]

' Ist Tom nicht gutaussehend?'