# Training Encoder Decoder Model

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

In [3]:
df = pd.read_csv('/content/drive/MyDrive/6156 Project/eng_jpn.txt',sep='\t',names=['Japanese','English']) 

In [4]:
df.head()

Unnamed: 0,Japanese,English
0,行け。,Go.
1,行きなさい。,Go.
2,こんにちは。,Hi.
3,もしもし。,Hi.
4,やっほー。,Hi.


In [5]:
df.Japanese = df.Japanese.apply(lambda x: 'sos '+ x +' eos')

In [6]:
jap_vocab= set()
for sent in df.Japanese:
    for word in sent.split():
        if word not in jap_vocab:
            jap_vocab.add(word)

In [7]:
eng_vocab= set()
for sent in df.English:
    for word in sent.split():
        if word not in eng_vocab:
            eng_vocab.add(word)

In [8]:
len(jap_vocab), len(eng_vocab), 

(52194, 18524)

In [9]:
JAP_VOCAB_SIZE= len(jap_vocab)+1
ENG_VOCAB_SIZE= len(eng_vocab)+1
print(JAP_VOCAB_SIZE)
print(ENG_VOCAB_SIZE)


52195
18525


In [10]:
jap_words = sorted(list(jap_vocab))
eng_words = sorted(list(eng_vocab))

In [11]:
# create japanese and english dictionaries
jap_word_index = dict((w, i) for i, w in enumerate(jap_words))
eng_word_index = dict((w, i) for i, w in enumerate(eng_words))

In [12]:
jap_word_index

{'"password"': 0,
 '100ドルまで使っていいですよ。': 1,
 '10時の約束なのに、未だに何の連絡もないのはおかしいな。どこかで事故にでも遭ったんじゃないだろうか。': 2,
 '10時まで仕事するつもりなの？': 3,
 '10月20日の昼の２時半はどうかしら？': 4,
 '10月20日午後２時３０分はどう？': 5,
 '10月からトムを見かけてないよ。': 6,
 '10月にまた会おう。': 7,
 '10月にトムと会った。': 8,
 '10月に僕はここにいなかった。': 9,
 '10月に君らにまた会うのを待ちきれない。': 10,
 '10月の最初の週に、ボストンに行くよ。': 11,
 '10月はボストンにいた。': 12,
 '10月以来、トムとは会ってないよ。': 13,
 '12時45分とかどう？': 14,
 '12時から1時まで、1時間の昼休みがある。': 15,
 '12時間寝たのにまだ眠い。': 16,
 '12月24日はクリスマスイブです。': 17,
 '12歳です。': 18,
 '16歳以下の子供を車に乗せて喫煙することは法律違反です。': 19,
 '1853年、ペリーはついに東京湾へ入港した。': 20,
 '1912年、タイタニック号は処女航海中に沈没した。': 21,
 '1941年に戦争が始まった。': 22,
 '1959年9月26日に、日本観測史上最大の台風が名古屋を襲った。': 23,
 '1962年にアルジェリアはフランスから独立した。': 24,
 '19時までに投函すれば、明日には届きますよ。': 25,
 '1つ分からないことがあります。': 26,
 '1人でいるの？': 27,
 '1分は60秒です。': 28,
 '1年間の留学でトムのフランス語力は飛躍的に向上した。': 29,
 '1日5語ずつ英単語を覚えれば、1年で1825語、100年で182620語覚えることができます。': 30,
 '1日か2日かかります。': 31,
 '1日や2日で東京を見て回るなんて出来ない。': 32,
 '1時間で終わらせます。': 33,
 '1時間は60分です。': 34,
 '1時間ほどお待ちいただけますか？': 35,
 '1月に試験を受ける予定です。': 36,
 '1番目が一番

In [13]:
jap_index_word = dict((i, w) for i, w in enumerate(jap_words))
eng_index_word = dict((i,w) for i, w in enumerate(eng_words))

In [14]:
eng_index_word

{0: '"4',
 1: '"About',
 2: '"Actually,',
 3: '"All',
 4: '"And',
 5: '"Anytime',
 6: '"Anywhere',
 7: '"Are',
 8: '"At',
 9: '"Because',
 10: '"Beispiel?"',
 11: '"By',
 12: '"Certainly',
 13: '"Certainly."',
 14: '"Could',
 15: '"Do',
 16: '"Don\'t',
 17: '"Everything."',
 18: '"For',
 19: '"Forgotten',
 20: '"French."',
 21: '"Friendship".',
 22: '"Get',
 23: '"Go',
 24: '"He',
 25: '"Here',
 26: '"Hold',
 27: '"How',
 28: '"I',
 29: '"I\'d',
 30: '"I\'ll',
 31: '"I\'m',
 32: '"I\'ve',
 33: '"If',
 34: '"In',
 35: '"Is',
 36: '"It',
 37: '"It\'s',
 38: '"It\'s"',
 39: '"Itch."',
 40: '"Jailhouse',
 41: '"Just',
 42: '"Keep',
 43: '"Let\'s',
 44: '"Me',
 45: '"Me,',
 46: '"Neither',
 47: '"No',
 48: '"No,',
 49: '"Not',
 50: '"Nothing',
 51: '"Nothing."',
 52: '"OK."',
 53: '"Of',
 54: '"Oh,',
 55: '"On',
 56: '"Only',
 57: '"Pretty',
 58: '"Rain,',
 59: '"Really?"',
 60: '"She',
 61: '"She\'s',
 62: '"So',
 63: '"Sorry,',
 64: '"Stay',
 65: '"Stop',
 66: '"Sure',
 67: '"Sure!"',
 68

In [15]:
X_train, X_test, y_train, y_test= train_test_split(df.Japanese, df.English, test_size=0.1, random_state=0)

In [16]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((56238,), (6249,), (56238,), (6249,))

In [17]:
jap_len_list=df.Japanese.apply(lambda x: len(x.split())).to_list()

eng_len_list=df.English.apply(lambda x: len(x.split())).to_list()


In [18]:
np.max(eng_len_list), np.max(jap_len_list)

(44, 10)

In [19]:
BATCH_SIZE= 64
max_jap_len =  np.max(jap_len_list)
max_eng_len =  np.max(eng_len_list)

In [20]:
max_jap_len, max_eng_len

(10, 44)

In [21]:
def data_batch_generator(x, y, batch_size=BATCH_SIZE):
    while True:
        for i in range(0, len(x), batch_size):
            encoder_input_data = np.zeros((batch_size,max_jap_len ), dtype='float32')
            decoder_input_data = np.zeros((batch_size, max_eng_len), dtype='float32')
            decoder_target_data = np.zeros((batch_size, max_eng_len, ENG_VOCAB_SIZE), dtype='float32' )
            for j, (jap_text, eng_text) in enumerate(zip(x[i:i+batch_size], y[i:i+batch_size])):
                for t , word in enumerate(jap_text.split()):
                    encoder_input_data[j,t] = jap_word_index[word]
                for t, word in enumerate(eng_text.split()):
                    if t < len(eng_text.split()) - 1:
                        decoder_input_data[j,t]= eng_word_index[word]
                    if t>0:
                    
                        decoder_target_data[j,t-1, eng_word_index[word]] = 1.
            yield ([encoder_input_data, decoder_input_data], decoder_target_data)      
            

In [22]:
from tensorflow.keras.layers import LSTM, Dropout, Dense, Embedding
from tensorflow.keras import Input, Model

In [23]:
# Eoncoder
encoder_input = Input(shape=(None, ))
encoder_embd = Embedding(JAP_VOCAB_SIZE,100, mask_zero=True)(encoder_input)
encoder_lstm = LSTM(100, return_state=True)
encoder_output,state_h, state_c = encoder_lstm(encoder_embd)

encoder_states= [state_h, state_c]

# Decoder
decoder_input = Input(shape=(None,))
decoder_embd = Embedding(ENG_VOCAB_SIZE, 100, mask_zero=True)
decoder_embedding= decoder_embd(decoder_input)
decoder_lstm = LSTM(100, return_state=True,return_sequences=True )
decoder_outputs, _, _= decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense= Dense(ENG_VOCAB_SIZE, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model1 = Model([encoder_input, decoder_input], decoder_outputs)

In [25]:
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

In [26]:
checkpoint = ModelCheckpoint("/content/drive/MyDrive/Bhomik - Copy/model_checkpoints/", monitor='val_accuracy')

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

callbacks_list = [checkpoint, early_stopping]

In [27]:
steps_per_epoch= np.ceil(len(X_train)/BATCH_SIZE)
steps_per_epoch_val = np.ceil(len(X_train)/BATCH_SIZE)

In [28]:
steps_per_epoch

879.0

In [29]:
EPOCHS= 30 #@param {type:'slider',min:10,max:100, step:10 }
EPOCHS

30

In [None]:
history1= model1.fit(data_batch_generator(X_train,y_train), 
                       epochs=EPOCHS,
                       steps_per_epoch= steps_per_epoch,
                     validation_data=data_batch_generator(X_test, y_test, BATCH_SIZE),
                       validation_steps=steps_per_epoch_val,
                     callbacks=callbacks_list)

Epoch 1/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 2/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 3/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 4/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 5/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 6/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 7/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 8/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 9/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 10/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 11/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 12/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 13/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 14/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 15/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 16/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 17/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 18/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 19/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 20/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 21/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 22/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 23/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 24/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 25/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 26/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 27/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 28/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 29/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


Epoch 30/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/assets




In [None]:
model1.save_weights('/content/drive/MyDrive/6156 Project/model1_weights.h5')

In [30]:
model1.load_weights('/content/drive/MyDrive/6156 Project/model1_weights.h5')

In [31]:
# Eoncoder
encoder_input = Input(shape=(None, ))
encoder_embd = Embedding(JAP_VOCAB_SIZE,1000, mask_zero=True)(encoder_input)
encoder_lstm = LSTM(250, return_state=True)
encoder_output,state_h, state_c = encoder_lstm(encoder_embd)

encoder_states= [state_h, state_c]

# Decoder
decoder_input = Input(shape=(None,))
decoder_embd = Embedding(ENG_VOCAB_SIZE, 1000, mask_zero=True)
decoder_embedding= decoder_embd(decoder_input)
decoder_lstm = LSTM(250, return_state=True,return_sequences=True )
decoder_outputs, _, _= decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense= Dense(ENG_VOCAB_SIZE, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model2 = Model([encoder_input, decoder_input], decoder_outputs)

In [33]:
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

In [34]:
checkpoint = ModelCheckpoint("/content/drive/MyDrive/6156 Project/model_checkpoints/model2/", monitor='val_accuracy')

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

callbacks_list = [checkpoint, early_stopping]

In [35]:
steps_per_epoch= np.ceil(len(X_train)/BATCH_SIZE)
steps_per_epoch_val = np.ceil(len(X_train)/BATCH_SIZE)

In [36]:
EPOCHS= 30 #@param {type:'slider',min:10,max:100, step:10 }
EPOCHS

30

In [None]:
history2= model2.fit(data_batch_generator(X_train,y_train), 
                       epochs=EPOCHS,
                       steps_per_epoch= steps_per_epoch,
                     validation_data=data_batch_generator(X_test, y_test, BATCH_SIZE),
                       validation_steps=steps_per_epoch_val,
                     callbacks=callbacks_list)

Epoch 1/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 2/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 3/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 4/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 5/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 6/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 7/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 8/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 9/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 10/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 11/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 12/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 13/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 14/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 15/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 16/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 17/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 18/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 19/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 20/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 21/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 22/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 23/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


Epoch 24/30



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Bhomik - Copy/model_checkpoints/model2/assets




In [None]:
model2.save_weights('/content/drive/MyDrive/6156 Project/2_model_weights.h5')

In [37]:
model2.load_weights('/content/drive/MyDrive/6156 Project/2_model_weights.h5')

In [38]:
encoder_model = Model(encoder_input, encoder_states)

In [39]:
decoder_state_input_h = Input(shape=(250,))
decoder_state_input_c= Input(shape=(250,))
decoder_states_input= [decoder_state_input_h, decoder_state_input_c]

dec_embd2 = decoder_embd(decoder_input)

decoder_output2,state_h2, state_c2 = decoder_lstm(dec_embd2, initial_state=decoder_states_input)
deccoder_states2= [state_h2, state_c2]

decoder_output2 = decoder_dense(decoder_output2)

decoder_model = Model(
                      [decoder_input]+decoder_states_input,
                      [decoder_output2]+ deccoder_states2)

In [40]:
def get_predicted_sentence(input_seq):
    states_value = encoder_model.predict(input_seq)
    
    target_seq = np.zeros((1,1))
    
    target_seq[0, 0] = jap_word_index['sos']
    
   
    stop_condition = False
    decoded_sentence = ''
    
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = eng_index_word[sampled_token_index]
        decoded_sentence += ' '+sampled_char
        
        if (sampled_char == 'eos' or len(decoded_sentence) > 50):
            stop_condition = True
        
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
        
        states_value = [h, c]
    
    return decoded_sentence

In [41]:
test_gen= data_batch_generator(X_test,y_test,batch_size=1)


In [42]:
Actual_test_sent = X_test.to_list()
Actual_test_trans= y_test.to_list()

In [43]:
test_inputs=[]
test_outputs=[]

In [44]:
from tqdm.notebook import tqdm_notebook
import time
for (input, output),_ in tqdm_notebook(iter(test_gen)):
    test_inputs.append(input)
    test_outputs.append(output)
    time.sleep(0.5) 

0it [00:00, ?it/s]

KeyboardInterrupt: ignored

In [45]:
test_inputs[0]

array([[  117., 38951.,   109.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.]], dtype=float32)

In [46]:
test_inputs[1]

array([[ 117., 2032.,  109.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.]], dtype=float32)

In [47]:
print("Japanese Sentece:", Actual_test_sent[10])

Japanese Sentece: sos トムが家まで後をつけてメアリーを殺したと彼らは思っている。 eos


In [48]:
print("Actual English Sentence:", Actual_test_trans[10])


Actual English Sentence: They think Tom followed Mary home and killed her.


In [49]:
print("Predicted English Translation:", get_predicted_sentence(test_inputs[10]))


Predicted English Translation:  think that Tom followed Mary home and killed her. I
