# Baseline Model: Word Level

## Loading Libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import re
import datetime
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import nltk.translate.bleu_score as bleu
import tensorflow as tf
from tensorflow.keras.layers import Input, Softmax, RNN, Dense, Embedding, LSTM, Flatten, Activation, GRU, Bidirectional, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
import nltk.translate.bleu_score as bleu
import matplotlib.ticker as ticker
import warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading Dataset

In [None]:
data_path = '/content/drive/MyDrive/CS2/2.Datasets/'
model_path = '/content/drive/MyDrive/CS2/3.Models/3_2_Baseline-Word/'

In [None]:
data = pd.read_csv(data_path + 'final_data.csv')
print(data.shape)
data.head()

(101717, 2)


Unnamed: 0,input_text,output_text
0,"Ofcouse , I love cheap fashion , fast fashion ...","Of course , I love cheap , fast fashion like f..."
1,If he want to listen to music that I do n't li...,If he wants to listen to music that I do n't l...
2,This happened because of the cultural differen...,This happened because of the cultural differen...
3,I 'm gon na earn much money to study abroad .,I 'm gon na earn enough money to study abroad .
4,It is not difficult for me but answer phones a...,It is not difficult for me but answering the p...


## Preprocess Data

In [None]:
def clean(text):
    text = re.sub('<.*>', '', text)
    text = re.sub('\(.*\)', '', text)
    text = re.sub('\[.*\]', '', text)
    text = re.sub('{.*}', '', text)
    text = re.sub("[-+@#^/|*(){}$~`<>=_]","",text)
    text = text.replace("\\","")
    text = re.sub("\[","",text)
    text = re.sub("\]","",text)
    text = re.sub("[0-9]","",text)
    return text

data['input_text'] = data['input_text'].apply(lambda x: clean(x))
data['output_text'] = data['output_text'].apply(lambda x: clean(x))

In [None]:
def preprocessing(data):
   
    data['input_text_length'] = data['input_text'].str.split().apply(len)
    data['output_text_length'] = data['output_text'].str.split().apply(len)

    data = data[data['input_text_length'] < 25]
    data = data[data['output_text_length'] < 25]

    data['output_text_in'] = '<start> ' + data['output_text'].astype(str)
    data['output_text_out'] = data['output_text'].astype(str) + ' <end>'

    data = data.drop(['input_text_length','output_text_length','output_text'], axis=1)
    return data

In [None]:
data = preprocessing(data)

data.iloc[0]['output_text_in'] = str(data.iloc[0]['output_text_in'])+' <end>'
data.iloc[0]['output_text_out'] = str(data.iloc[0]['output_text_out'])+' <end>'

print(data.shape)
data.head()

(93503, 3)


Unnamed: 0,input_text,output_text_in,output_text_out
0,"Ofcouse , I love cheap fashion , fast fashion ...","<start> Of course , I love cheap , fast fashio...","Of course , I love cheap , fast fashion like f..."
1,If he want to listen to music that I do n't li...,<start> If he wants to listen to music that I ...,If he wants to listen to music that I do n't l...
2,This happened because of the cultural differen...,<start> This happened because of the cultural ...,This happened because of the cultural differen...
3,I 'm gon na earn much money to study abroad .,<start> I 'm gon na earn enough money to study...,I 'm gon na earn enough money to study abroad ...
4,It is not difficult for me but answer phones a...,<start> It is not difficult for me but answeri...,It is not difficult for me but answering the p...


## Train Test Split

In [None]:
train, test = train_test_split(data, test_size=0.1, random_state=42)
print('Shape of Train Data:', train.shape)
print('Shape of Test Data:', test.shape)

Shape of Train Data: (84152, 3)
Shape of Test Data: (9351, 3)


## Tokenization

In [None]:
tokenizer_i = Tokenizer(filters="", char_level=False, lower=False)
tokenizer_o = Tokenizer(filters="", char_level=False, lower=False)

tokenizer_i.fit_on_texts(train['input_text'].values)
tokenizer_o.fit_on_texts(train['output_text_in'].values)

vocab_size_input = len(tokenizer_i.word_index.keys())
print('Input Vocab Size:', vocab_size_input)

vocab_size_output = len(tokenizer_o.word_index.keys())
print('Output Vocab Size:', vocab_size_output)

Input Vocab Size: 35510
Output Vocab Size: 29350


In [None]:
input_vocab = tokenizer_i.word_index
output_vocab = tokenizer_o.word_index

## Data Pipeline

In [None]:
class Dataset:
    def __init__(self, data, tokenizer_i, tokenizer_o, max_len_enc, max_len_dec):
        self.encoder_inps = data['input_text'].values
        self.decoder_inps = data['output_text_in'].values
        self.decoder_outs = data['output_text_out'].values
        self.tokenizer_o = tokenizer_o
        self.tokenizer_i = tokenizer_i
        self.max_len_enc = max_len_enc
        self.max_len_dec = max_len_dec

    def __getitem__(self, i):
        self.encoder_seq = self.tokenizer_i.texts_to_sequences([self.encoder_inps[i]]) 
        self.decoder_inp_seq = self.tokenizer_o.texts_to_sequences([self.decoder_inps[i]])
        self.decoder_out_seq = self.tokenizer_o.texts_to_sequences([self.decoder_outs[i]])

        self.encoder_seq = pad_sequences(self.encoder_seq, maxlen=self.max_len_enc, dtype='int32', padding='post')
        self.decoder_inp_seq = pad_sequences(self.decoder_inp_seq, maxlen=self.max_len_dec, dtype='int32', padding='post')
        self.decoder_out_seq = pad_sequences(self.decoder_out_seq, maxlen=self.max_len_dec, dtype='int32', padding='post')
        return self.encoder_seq, self.decoder_inp_seq, self.decoder_out_seq

    def __len__(self):
        return len(self.encoder_inps)

#-------------------------------------------------------------------------------------------------------------------------------------
class Dataloder(tf.keras.utils.Sequence):    
    def __init__(self, dataset, batch_size=1):
        self.dataset = dataset
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.dataset.encoder_inps))

    def __getitem__(self, i):
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            data.append(self.dataset[j])

        batch = [np.squeeze(np.stack(samples, axis=1), axis=0) for samples in zip(*data)]
        return tuple([[batch[0],batch[1]],batch[2]])

    def __len__(self): 
        return len(self.indexes) // self.batch_size

    def on_epoch_end(self):
        self.indexes = np.random.permutation(self.indexes)

In [None]:
train_dataset = Dataset(train, tokenizer_i, tokenizer_o, 25, 25)
test_dataset  = Dataset(test, tokenizer_i, tokenizer_o, 25, 25)

train_dataloader = Dataloder(train_dataset, batch_size=512)
test_dataloader = Dataloder(test_dataset, batch_size=512)

print('Train Dataloader:', train_dataloader[0][0][0].shape, train_dataloader[0][0][1].shape, train_dataloader[0][1].shape)
print('Test Dataloader:', test_dataloader[0][0][0].shape, test_dataloader[0][0][1].shape, test_dataloader[0][1].shape)

Train Dataloader: (512, 25) (512, 25) (512, 25)
Test Dataloader: (512, 25) (512, 25) (512, 25)


## Creating Embedding

### Glove Embedding

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

--2021-08-12 07:00:59--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2021-08-12 07:00:59--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2021-08-12 07:00:59--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2021-0

In [None]:
# load the whole embedding into memory
embeddings_index = dict()
f = open('glove.6B.300d.txt') 
for line in f:
	values = line.split()
	word = values[0]
	coefs = np.asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
f.close()
print('Loaded word vectors =', len(embeddings_index))

# create a weight matrix for words in training docs
encoder_embedding_matrix_glove = np.zeros((len(input_vocab)+1, 300))
for word, i in input_vocab.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		encoder_embedding_matrix_glove[i] = embedding_vector
print('Shape of Encoder Embedding Matrix =', encoder_embedding_matrix_glove.shape)

# create a weight matrix for words in training docs
decoder_embedding_matrix_glove = np.zeros((len(output_vocab)+1, 300))
for word, i in output_vocab.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		decoder_embedding_matrix_glove[i] = embedding_vector
print('Shape of Decoder Embedding Matrix =', decoder_embedding_matrix_glove.shape)

Loaded word vectors = 400000
Shape of Encoder Embedding Matrix = (35511, 300)
Shape of Decoder Embedding Matrix = (29351, 300)


### Fasttext Embedding

In [None]:
!wget https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip
!unzip wiki-news-300d-1M.vec.zip

--2021-08-12 07:22:05--  https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.74.142, 172.67.9.4, 104.22.75.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.74.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 681808098 (650M) [application/zip]
Saving to: ‘wiki-news-300d-1M.vec.zip’


2021-08-12 07:22:26 (31.9 MB/s) - ‘wiki-news-300d-1M.vec.zip’ saved [681808098/681808098]

Archive:  wiki-news-300d-1M.vec.zip
  inflating: wiki-news-300d-1M.vec   


In [None]:
# load the whole embedding into memory
embeddings_index = dict()
f = open('wiki-news-300d-1M.vec') 
for line in f:
	values = line.split()
	word = values[0]
	coefs = np.asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
f.close()
print('Loaded word vectors =', len(embeddings_index))

# create a weight matrix for words in training docs
encoder_embedding_matrix_fast = np.zeros((len(input_vocab)+1, 300))
for word, i in input_vocab.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		encoder_embedding_matrix_fast[i] = embedding_vector
print('Shape of Encoder Embedding Matrix =', encoder_embedding_matrix_fast.shape)

# create a weight matrix for words in training docs
decoder_embedding_matrix_fast = np.zeros((len(output_vocab)+1, 300))
for word, i in output_vocab.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		decoder_embedding_matrix_fast[i] = embedding_vector
print('Shape of Decoder Embedding Matrix =', decoder_embedding_matrix_fast.shape)

Loaded word vectors = 999995
Shape of Encoder Embedding Matrix = (35511, 300)
Shape of Decoder Embedding Matrix = (29351, 300)


# LSTM Model

In [None]:
class Encoder(tf.keras.Model):
    '''
    Encoder model -- That takes a input sequence and returns encoder-outputs,encoder_final_state_h,encoder_final_state_c
    '''
    def __init__(self,in_vocab_size,embedding_dim,enc_units,input_length,embed,name='Encoder'):
        super().__init__(name=name)
        self.in_vocab_size = in_vocab_size
        self.embedding_dim = embedding_dim
        self.input_length = input_length
        self.enc_units = enc_units
        self.embed = embed

    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Encoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_glove], trainable=False, name="Encoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_fast], trainable=False, name="Encoder_Embedding")

        self.lstm = LSTM(self.enc_units, return_state=True, return_sequences=True, name="Encoder_LSTM")
        
    def call(self,input_sentences,training=True):
        input_embed = self.embedding(input_sentences)
        encoder_output, encoder_state_h, encoder_state_c = self.lstm(input_embed)
        return encoder_output, encoder_state_h, encoder_state_c

#-------------------------------------------------------------------------------------------------------------------------------------
class Decoder(tf.keras.Model):
    '''
    Decoder model -- That takes a input sequence and returns output sequence
    '''
    def __init__(self,out_vocab_size,embedding_dim,dec_units,input_length,embed,name='Decoder'):
        super().__init__(name=name)
        self.out_vocab_size = out_vocab_size
        self.embedding_dim = embedding_dim
        self.dec_units = dec_units
        self.input_length = input_length
        self.embed = embed
     
    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Decoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_glove], trainable=False, name="Decoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_fast], trainable=False, name="Decoder_Embedding")

        self.lstm = LSTM(self.dec_units, return_sequences=True, return_state=True, name="Decoder_LSTM")
        
    def call(self,target_sentences,initial_states):
        target_embedd = self.embedding(target_sentences)
        decoder_output, decoder_final_state_h, decoder_final_state_c = self.lstm(target_embedd, initial_state=initial_states)
        return decoder_output, decoder_final_state_h, decoder_final_state_c

#-------------------------------------------------------------------------------------------------------------------------------------
class Encoder_Decoder(tf.keras.Model):
    
    def __init__(self, encoder_inputs_length, decoder_inputs_length, in_vocab_size, out_vocab_size, embedding_dim, enc_units, dec_units, embed, name='Encoder-Decoder'):
        super().__init__(name=name)
        self.encoder = Encoder(in_vocab_size=in_vocab_size+1, embedding_dim=embedding_dim, enc_units=enc_units, input_length=encoder_inputs_length, embed=embed)
        self.decoder = Decoder(out_vocab_size=out_vocab_size+1, embedding_dim=embedding_dim, dec_units=dec_units, input_length=decoder_inputs_length, embed=embed)
        self.dense   = Dense(out_vocab_size, activation='softmax', name='Dense')
    
    def call(self, data):
        input, output = data[0], data[1]

        encoder_output, encoder_h, encoder_c = self.encoder(input)
        decoder_output, decoder_h, decoder_c = self.decoder(output, [encoder_h, encoder_c])
        output                               = self.dense(decoder_output)
        return output

#-------------------------------------------------------------------------------------------------------------------------------------
def build_model_lstm(embed, name):
  model = Encoder_Decoder(encoder_inputs_length=25, decoder_inputs_length=25, in_vocab_size=vocab_size_input, out_vocab_size=vocab_size_output,embedding_dim=300, enc_units=100, dec_units=100, embed=embed, name=name)
  return model

In [None]:
def train_model(model, model_name):

    es = EarlyStopping(patience=3, verbose=1, min_delta=0.001, monitor='val_loss', mode='min', restore_best_weights=True)
    
    train_steps = train.shape[0]//512
    test_steps = test.shape[0]//512

    model.compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy')

    with tf.device('/device:GPU:0'):
        model.fit(train_dataloader, steps_per_epoch=train_steps, epochs=20, validation_data=test_dataloader, validation_steps=test_steps, callbacks=[es])

    model.summary()

In [None]:
def predict(input_sentence, model):
 
  DECODER_SEQ_LEN = 25
  predict_word_idx = np.zeros((1, 1))
  predict_word_idx[0,0] = 1
  predicted_sentence = ''

  input_sequence=tokenizer_i.texts_to_sequences([input_sentence])
  inputs=pad_sequences(input_sequence,maxlen=25,padding='post')
  inputs=tf.convert_to_tensor(inputs)

  enc_output, enc_state_h, enc_state_c = model.layers[0](inputs)
  states_values = [enc_state_h, enc_state_c]

  for i in range(DECODER_SEQ_LEN):
        predict_emb = model.layers[1].embedding(predict_word_idx)
        [dec_output, dec_state_h, dec_state_c] = model.layers[1].lstm(predict_emb, initial_state=states_values)
        dec_output = model.layers[2](dec_output)
        states_values = [dec_state_h, dec_state_c]

        predict_word_idx = np.reshape(np.argmax(dec_output), (1, 1))
        predicted_sentence += ' ' + tokenizer_o.index_word[int(predict_word_idx)]

        if tokenizer_o.word_index['<end>'] == predict_word_idx:
            return predicted_sentence
   
  return predicted_sentence

#------------------------------------------------------------------------------------------------------------------
def predict_result(data, model):
    for i in range(len(data[:3])):
        print("Input Text:", data['input_text'].iloc[i])
        print("Output Text:", ' '.join(data['output_text_out'].iloc[i].split()[:-1]))
        print("Predicted Text:", ' '.join(predict(data['input_text'].iloc[i], model).split()[:-1]))
        print('='*100)

#------------------------------------------------------------------------------------------------------------------
def get_BLEU(train, test, model):
    total_bleu=0
    input_range = 100
    for i in range(0,input_range):
        output_sentence = ' '.join(train['output_text_out'].iloc[i].split()[:-1])
        predicted_sentence = ' '.join(predict(train['input_text'].iloc[i], model).split()[:-1])
        output_sentence = [output_sentence.split()]
        predicted_sentence = predicted_sentence.split()
        bleu_score = bleu.sentence_bleu(output_sentence, predicted_sentence)
        total_bleu += bleu_score
    train_avg_bleu = total_bleu/input_range

    total_bleu=0
    input_range = 100
    for i in range(0,input_range):
        output_sentence = ' '.join(test['output_text_out'].iloc[i].split()[:-1])
        predicted_sentence = ' '.join(predict(test['input_text'].iloc[i], model).split()[:-1])
        output_sentence = [output_sentence.split()]
        predicted_sentence = predicted_sentence.split()
        bleu_score = bleu.sentence_bleu(output_sentence, predicted_sentence)
        total_bleu += bleu_score
    test_avg_bleu = total_bleu/input_range

    print('='*50)
    print('Avg. Train BLEU Score:', train_avg_bleu)
    print('Avg. Test BLEU Score:', test_avg_bleu)
    print('='*50)

## Encoder-Decoder: Scratch Embed

### Model Training

In [None]:
model_L1 = build_model_lstm(embed='scratch', name='LSTM_Encoder-Decoder_Scratch')
train_model(model_L1, 'LSTM_Encoder-Decoder_Scratch')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "LSTM_Encoder-Decoder_Scratch"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10813700  
_________________________________________________________________
Decoder (Decoder)            multiple                  8965700   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,743,750
Trainable params: 22,743,750
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_L1.save_weights(model_path + '1_1_LSTM_Scratch/' + '1_1_LSTM_Scratch', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_L1)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: At the school , the temperature is a big .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you for me .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: A few days ago was not .


In [None]:
predict_result(test, model_L1)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever seen the New Year 's house ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is very hot because I have to give me how much it is in a good .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not know on the DVD 's house ,


In [None]:
get_BLEU(train, test, model_L1)

Avg. Train BLEU Score: 0.4479462706390938
Avg. Test BLEU Score: 0.4402298645803892


## Encoder-Decoder: Glove Embed

### Model Training

In [None]:
model_L2 = build_model_lstm(embed='glove', name='LSTM_Encoder-Decoder_Glove')
train_model(model_L2, 'LSTM_Encoder-Decoder_Glove')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "LSTM_Encoder-Decoder_Glove"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10813700  
_________________________________________________________________
Decoder (Decoder)            multiple                  8965700   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,743,750
Trainable params: 3,285,150
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_L2.save_weights(model_path + '1_2_LSTM_Glove/' + '1_2_LSTM_Glove', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_L2)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: In the way , the party , I have a job .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: These days ago .


In [None]:
predict_result(test, model_L2)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Do you have the difference of the difference of the hotel .
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so I feel that I have to be a very poor .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: I can not the internet or not my office , I .


In [None]:
get_BLEU(train, test, model_L2)

Avg. Train BLEU Score: 0.4487982872512108
Avg. Test BLEU Score: 0.4329829866763378


## Encoder-Decoder: FastText Embed

### Model Training

In [None]:
model_L3 = build_model_lstm(embed='fast', name='LSTM_Encoder-Decoder_Fast')
train_model(model_L3, 'LSTM_Encoder-Decoder_Fast')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "LSTM_Encoder-Decoder_Fast"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10813700  
_________________________________________________________________
Decoder (Decoder)            multiple                  8965700   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,743,750
Trainable params: 3,285,150
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_L3.save_weights(model_path + '1_3_LSTM_Fast/' + '1_3_LSTM_Fast', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_L3)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: In the other hand , the first day .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: A few days ago , it is the most .


In [None]:
predict_result(test, model_L3)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you have a new of the next time .
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so I have a little bit that I will be able to improve my English .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not get up on the book in the U .


In [None]:
get_BLEU(train, test, model_L3)

Avg. Train BLEU Score: 0.4390916524172069
Avg. Test BLEU Score: 0.46569867802376036


# Bidirectional LSTM Model

In [None]:
class Encoder(tf.keras.Model):
    '''
    Encoder model -- That takes a input sequence and returns encoder-outputs,encoder_final_state_h,encoder_final_state_c
    '''
    def __init__(self,in_vocab_size,embedding_dim,enc_units,input_length,embed,name='Encoder'):
        super().__init__(name=name)
        self.in_vocab_size = in_vocab_size
        self.embedding_dim = embedding_dim
        self.input_length = input_length
        self.enc_units = enc_units
        self.embed = embed

    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Encoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_glove], trainable=False, name="Encoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_fast], trainable=False, name="Encoder_Embedding")

        self.lstm = Bidirectional(LSTM(self.enc_units, return_state=True, return_sequences=True, name="Encoder_LSTM"))
        
    def call(self, input_sentences, training=True):
        input_embed = self.embedding(input_sentences)
        encoder_output, encoder_state_h_fwd, encoder_state_c_fwd, encoder_state_h_bwd, encoder_state_c_bwd = self.lstm(input_embed)
        encoder_state_h = Concatenate()([encoder_state_h_fwd, encoder_state_h_bwd])
        encoder_state_c = Concatenate()([encoder_state_c_fwd, encoder_state_c_bwd])
        return encoder_output, encoder_state_h, encoder_state_c

#-------------------------------------------------------------------------------------------------------------------------------------
class Decoder(tf.keras.Model):
    '''
    Decoder model -- That takes a input sequence and returns output sequence
    '''
    def __init__(self,out_vocab_size,embedding_dim,dec_units,input_length,embed,name='Decoder'):
        super().__init__(name=name)
        self.out_vocab_size = out_vocab_size
        self.embedding_dim = embedding_dim
        self.dec_units = dec_units
        self.input_length = input_length
        self.embed = embed
     
    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Decoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_glove], trainable=False, name="Decoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_fast], trainable=False, name="Decoder_Embedding")

        self.lstm = LSTM(self.dec_units*2, return_sequences=True, return_state=True, name="Decoder_LSTM")
        
    def call(self,target_sentences,initial_states):
        target_embedd = self.embedding(target_sentences)
        decoder_output, decoder_final_state_h, decoder_final_state_c = self.lstm(target_embedd, initial_state=initial_states)
        return decoder_output, decoder_final_state_h, decoder_final_state_c

#-------------------------------------------------------------------------------------------------------------------------------------
class Encoder_Decoder(tf.keras.Model):
    
    def __init__(self, encoder_inputs_length, decoder_inputs_length, in_vocab_size, out_vocab_size, embedding_dim, enc_units, dec_units, embed, name='Encoder-Decoder'):
        super().__init__(name=name)
        self.encoder = Encoder(in_vocab_size=in_vocab_size+1, embedding_dim=embedding_dim, enc_units=enc_units, input_length=encoder_inputs_length, embed=embed)
        self.decoder = Decoder(out_vocab_size=out_vocab_size+1, embedding_dim=embedding_dim, dec_units=dec_units, input_length=decoder_inputs_length, embed=embed)
        self.dense   = Dense(out_vocab_size, activation='softmax', name='Dense')
    
    def call(self, data):
        input, output = data[0], data[1]

        encoder_output, encoder_h, encoder_c = self.encoder(input)
        decoder_output, decoder_h, decoder_c = self.decoder(output, [encoder_h, encoder_c])
        output                               = self.dense(decoder_output)
        return output

#-------------------------------------------------------------------------------------------------------------------------------------
def build_model_bilstm(embed, name):
  model = Encoder_Decoder(encoder_inputs_length=25, decoder_inputs_length=25, in_vocab_size=vocab_size_input, out_vocab_size=vocab_size_output,embedding_dim=300, enc_units=100, dec_units=100, embed=embed, name=name)
  return model

## Encoder-Decoder: Scratch Embed

### Model Training

In [None]:
model_BL1 = build_model_bilstm(embed='scratch', name='BiLSTM_Encoder-Decoder_Scratch')
train_model(model_BL1, 'BiLSTM_Encoder-Decoder_Scratch')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "BiLSTM_Encoder-Decoder_Scratch"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10974100  
_________________________________________________________________
Decoder (Decoder)            multiple                  9206100   
_________________________________________________________________
Dense (Dense)                multiple                  5899350   
Total params: 26,079,550
Trainable params: 26,079,550
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_BL1.save_weights(model_path + '2_1_BiLSTM_Scratch/' + '2_1_BiLSTM_Scratch', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_BL1)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: At the party , a job has a boyfriend .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you guys .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: A few days ago , it was not .


In [None]:
predict_result(test, model_BL1)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever been over the school of a family ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so hard to do my best and it 's not free .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not get a special service or not in the internet .


In [None]:
get_BLEU(train, test, model_BL1)

Avg. Train BLEU Score: 0.43750467220453443
Avg. Test BLEU Score: 0.4257227891921617


## Encoder-Decoder: Glove Embed

### Model Training

In [None]:
model_BL2 = build_model_bilstm(embed='glove', name='BiLSTM_Encoder-Decoder_Glove')
train_model(model_BL2, 'BiLSTM_Encoder-Decoder_Glove')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "BiLSTM_Encoder-Decoder_Glove"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10974100  
_________________________________________________________________
Decoder (Decoder)            multiple                  9206100   
_________________________________________________________________
Dense (Dense)                multiple                  5899350   
Total params: 26,079,550
Trainable params: 6,620,950
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_BL2.save_weights(model_path + '2_2_BiLSTM_Glove/' + '2_2_BiLSTM_Glove', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_BL2)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: In the party , the party has a job .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: The last week has been a little .


In [None]:
predict_result(test, model_BL2)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever seen the middle of the office ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so that I have to be able to keep my body very much .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: I can check the next or not the main or I .


In [None]:
get_BLEU(train, test, model_BL2)

Avg. Train BLEU Score: 0.4293580872742956
Avg. Test BLEU Score: 0.42601507482548406


## Encoder-Decoder: FastText Embed

### Model Training

In [None]:
model_BL3 = build_model_bilstm(embed='fast', name='BiLSTM_Encoder-Decoder_Fast')
train_model(model_BL3, 'BiLSTM_Encoder-Decoder_Fast')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "BiLSTM_Encoder-Decoder_Fast"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10974100  
_________________________________________________________________
Decoder (Decoder)            multiple                  9206100   
_________________________________________________________________
Dense (Dense)                multiple                  5899350   
Total params: 26,079,550
Trainable params: 6,620,950
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_BL3.save_weights(model_path + '2_3_BiLSTM_Fast/' + '2_3_BiLSTM_Fast', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_BL3)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: At the other hand , a job is a party .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you for reading .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: A few days ago .


In [None]:
predict_result(test, model_BL3)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever one of the party of the school ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so much because I will be able to my face .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not check the test , I should get the right .


In [None]:
get_BLEU(train, test, model_BL3)

Avg. Train BLEU Score: 0.4262674491548774
Avg. Test BLEU Score: 0.4179576372366596


# GRU Model

In [None]:
class Encoder(tf.keras.Model):
    '''
    Encoder model -- That takes a input sequence and returns encoder-outputs,encoder_final_state_h,encoder_final_state_c
    '''
    def __init__(self,in_vocab_size,embedding_dim,enc_units,input_length,embed,name='Encoder'):
        super().__init__(name=name)
        self.in_vocab_size = in_vocab_size
        self.embedding_dim = embedding_dim
        self.input_length = input_length
        self.enc_units = enc_units
        self.embed = embed

    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Encoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_glove], trainable=False, name="Encoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.in_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[encoder_embedding_matrix_fast], trainable=False, name="Encoder_Embedding")

        self.gru = GRU(self.enc_units, return_state=True, return_sequences=True, name="Encoder_GRU")
        
    def call(self,input_sentences,training=True):
        input_embed = self.embedding(input_sentences)
        encoder_output, encoder_state_h = self.gru(input_embed)
        return encoder_output, encoder_state_h

#-------------------------------------------------------------------------------------------------------------------------------------
class Decoder(tf.keras.Model):
    '''
    Decoder model -- That takes a input sequence and returns output sequence
    '''
    def __init__(self,out_vocab_size,embedding_dim,dec_units,input_length,embed,name='Decoder'):
        super().__init__(name=name)
        self.out_vocab_size = out_vocab_size
        self.embedding_dim = embedding_dim
        self.dec_units = dec_units
        self.input_length = input_length
        self.embed = embed
     
    def build(self, input_shape):
        if self.embed == 'scratch':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, name="Decoder_Embedding")
        elif self.embed == 'glove':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_glove], trainable=False, name="Decoder_Embedding")
        elif self.embed == 'fast':
            self.embedding = Embedding(input_dim=self.out_vocab_size, output_dim=self.embedding_dim, input_length=self.input_length, mask_zero=True, weights=[decoder_embedding_matrix_fast], trainable=False, name="Decoder_Embedding")

        self.gru = GRU(self.dec_units, return_sequences=True, return_state=True, name="Decoder_GRU")
        
    def call(self,target_sentences,initial_states):
        target_embedd = self.embedding(target_sentences)
        decoder_output, decoder_final_state_h = self.gru(target_embedd, initial_state=initial_states)
        return decoder_output, decoder_final_state_h

#-------------------------------------------------------------------------------------------------------------------------------------
class Encoder_Decoder(tf.keras.Model):
    
    def __init__(self, encoder_inputs_length, decoder_inputs_length, in_vocab_size, out_vocab_size, embedding_dim, enc_units, dec_units, embed, name='Encoder-Decoder'):
        super().__init__(name=name)
        self.encoder = Encoder(in_vocab_size=in_vocab_size+1, embedding_dim=embedding_dim, enc_units=enc_units, input_length=encoder_inputs_length, embed=embed)
        self.decoder = Decoder(out_vocab_size=out_vocab_size+1, embedding_dim=embedding_dim, dec_units=dec_units, input_length=decoder_inputs_length, embed=embed)
        self.dense   = Dense(out_vocab_size, activation='softmax', name='Dense')
    
    def call(self, data):
        input, output = data[0], data[1]

        encoder_output, encoder_h = self.encoder(input)
        decoder_output, decoder_h = self.decoder(output, encoder_h)
        output                    = self.dense(decoder_output)
        return output

#-------------------------------------------------------------------------------------------------------------------------------------
def build_model_gru(embed, name):
  model = Encoder_Decoder(encoder_inputs_length=25, decoder_inputs_length=25, in_vocab_size=vocab_size_input, out_vocab_size=vocab_size_output,embedding_dim=300, enc_units=100, dec_units=100, embed=embed, name=name)
  return model

In [None]:
def predict(input_sentence, model):
 
  DECODER_SEQ_LEN = 25
  predict_word_idx = np.zeros((1, 1))
  predict_word_idx[0,0] = 1
  predicted_sentence = ''

  input_sequence=tokenizer_i.texts_to_sequences([input_sentence])
  inputs=pad_sequences(input_sequence,maxlen=25,padding='post')
  inputs=tf.convert_to_tensor(inputs)

  enc_output, enc_state_h = model.layers[0](inputs)
  states_values = enc_state_h

  for i in range(DECODER_SEQ_LEN):
        predict_emb = model.layers[1].embedding(predict_word_idx)
        [dec_output, dec_state_h] = model.layers[1].gru(predict_emb, initial_state=states_values)
        dec_output = model.layers[2](dec_output)
        states_values = dec_state_h

        predict_word_idx = np.reshape(np.argmax(dec_output), (1, 1))
        predicted_sentence += ' ' + tokenizer_o.index_word[int(predict_word_idx)]

        if tokenizer_o.word_index['<end>'] == predict_word_idx:
            return predicted_sentence
   
  return predicted_sentence

#------------------------------------------------------------------------------------------------------------------
def predict_result(data, model):
    for i in range(len(data[:3])):
        print("Input Text:", data['input_text'].iloc[i])
        print("Output Text:", ' '.join(data['output_text_out'].iloc[i].split()[:-1]))
        print("Predicted Text:", ' '.join(predict(data['input_text'].iloc[i], model).split()[:-1]))
        print('='*100)

#------------------------------------------------------------------------------------------------------------------
def get_BLEU(train, test, model):
    total_bleu=0
    input_range = 100
    for i in range(0,input_range):
        output_sentence = ' '.join(train['output_text_out'].iloc[i].split()[:-1])
        predicted_sentence = ' '.join(predict(train['input_text'].iloc[i], model).split()[:-1])
        output_sentence = [output_sentence.split()]
        predicted_sentence = predicted_sentence.split()
        bleu_score = bleu.sentence_bleu(output_sentence, predicted_sentence)
        total_bleu += bleu_score
    train_avg_bleu = total_bleu/input_range

    total_bleu=0
    input_range = 100
    for i in range(0,input_range):
        output_sentence = ' '.join(test['output_text_out'].iloc[i].split()[:-1])
        predicted_sentence = ' '.join(predict(test['input_text'].iloc[i], model).split()[:-1])
        output_sentence = [output_sentence.split()]
        predicted_sentence = predicted_sentence.split()
        bleu_score = bleu.sentence_bleu(output_sentence, predicted_sentence)
        total_bleu += bleu_score
    test_avg_bleu = total_bleu/input_range

    print('='*50)
    print('Avg. Train BLEU Score:', train_avg_bleu)
    print('Avg. Test BLEU Score:', test_avg_bleu)
    print('='*50)

## Encoder-Decoder: Scratch Embed

### Model Training

In [None]:
model_G1 = build_model_gru(embed='scratch', name='GRU_Encoder-Decoder_Scratch')
train_model(model_G1, 'GRU_Encoder-Decoder_Scratch')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "GRU_Encoder-Decoder_Scratch"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10773900  
_________________________________________________________________
Decoder (Decoder)            multiple                  8925900   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,664,150
Trainable params: 22,664,150
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_G1.save_weights(model_path + '3_1_GRU_Scratch/' + '3_1_GRU_Scratch', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_G1)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: At the school , we went to the library .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you for your corrections .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: A few days was that my best .


In [None]:
predict_result(test, model_G1)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever seen in the U ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It was so so much , I must have to put it on the time .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not use the computer or not the time I should not get up .


In [None]:
get_BLEU(train, test, model_G1)

Avg. Train BLEU Score: 0.4440263747995784
Avg. Test BLEU Score: 0.4109874622853129


## Encoder-Decoder: Glove Embed

### Model Training

In [None]:
model_G2 = build_model_gru(embed='glove', name='GRU_Encoder-Decoder_Glove')
train_model(model_G2, 'GRU_Encoder-Decoder_Glove')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "GRU_Encoder-Decoder_Glove"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10773900  
_________________________________________________________________
Decoder (Decoder)            multiple                  8925900   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,664,150
Trainable params: 3,205,550
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_G2.save_weights(model_path + '3_2_GRU_Glove/' + '3_2_GRU_Glove', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_G2)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: In the party , a part of a party .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you recommend .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: These days ago .


In [None]:
predict_result(test, model_G2)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you have a different person in the school ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so so much that I have been so excited to my heart .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: I can check the computer or not check .


In [None]:
get_BLEU(train, test, model_G2)

Avg. Train BLEU Score: 0.4585618940447448
Avg. Test BLEU Score: 0.4324036178248305


## Encoder-Decoder: FastText Embed

### Model Training

In [None]:
model_G3 = build_model_gru(embed='fast', name='GRU_Encoder-Decoder_Fast')
train_model(model_G3, 'GRU_Encoder-Decoder_Fast')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "GRU_Encoder-Decoder_Fast"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder (Encoder)            multiple                  10773900  
_________________________________________________________________
Decoder (Decoder)            multiple                  8925900   
_________________________________________________________________
Dense (Dense)                multiple                  2964350   
Total params: 22,664,150
Trainable params: 3,205,550
Non-trainable params: 19,458,600
_________________________________________________________________


In [None]:
model_G3.save_weights(model_path + '3_3_GRU_Fast/' + '3_3_GRU_Fast', save_format='tf') 

### Model Prediction

In [None]:
predict_result(train, model_G3)

Input Text: At the party , the member has variety job .
Output Text: At the party , the members have a variety of jobs .
Predicted Text: At first time , the company has a long time .
Input Text: Thank you NY .
Output Text: Thank you , NY !
Predicted Text: Thank you guys .
Input Text: Few last days have been strange .
Output Text: The last few days have been strange .
Predicted Text: The most years has been been a long .


In [None]:
predict_result(test, model_G3)

Input Text: Have you ever over the wall of school ?
Output Text: Have you ever climbed over the wall at school ?
Predicted Text: Have you ever seen in the company ?
Input Text: It is so narrow that I have to keep my body very fit everytime .
Output Text: It is so narrow that I have to keep my body very fit all the time .
Predicted Text: It is so I feel like my mind to do my best .
Input Text: You can check the maintenance is finished or not at twitter .
Output Text: You can check whether the maintenance is finished or not on twitter .
Predicted Text: You can not know the best time to get up .


In [None]:
get_BLEU(train, test, model_G3)

Avg. Train BLEU Score: 0.4604231460584209
Avg. Test BLEU Score: 0.4460802502826131


# Summary

In [None]:
from prettytable import PrettyTable
  
print('---------------------------------------SUMMARY OF BASELINE MODEL--------------------------------------')
myTable = PrettyTable(["Model", "Train Loss", "Val Loss", "Avg Train BLEU Score", "Avg Test BLEU Score"])
  
myTable.add_row(["GRU Model - Scratch Embed", "1.38", "1.69", "0.44", "0.41"])
myTable.add_row(["GRU Model - Glove Embed", "1.59", "1.67", "0.45", "0.43"])
myTable.add_row(["GRU Model - Fasttext Embed", "1.64", "1.69", "0.46", "0.44"])
myTable.add_row([" ", " ", " ", " ", " "])

myTable.add_row(["LSTM Model - Scratch Embed", "1.38", "1.67", "0.44", "0.44"])
myTable.add_row(["LSTM Model - Glove Embed", "1.63", "1.68", "0.44", "0.43"])
myTable.add_row(["LSTM Model - Fasttext Embed", "1.71", "1.72", "0.43", "0.46"])
myTable.add_row([" ", " ", " ", " ", " "])

myTable.add_row(["BiLSTM Model - Scratch Embed", "1.18", "1.54", "0.43", "0.42"])
myTable.add_row(["BiLSTM Model - Glove Embed", "1.26", "1.44", "0.42", "0.42"])
myTable.add_row(["BiLSTM Model - Fasttext Embed", "1.38", "1.46", "0.42", "0.41"])

print(myTable)

---------------------------------------SUMMARY OF BASELINE MODEL--------------------------------------
+-------------------------------+------------+----------+----------------------+---------------------+
|             Model             | Train Loss | Val Loss | Avg Train BLEU Score | Avg Test BLEU Score |
+-------------------------------+------------+----------+----------------------+---------------------+
|   GRU Model - Scratch Embed   |    1.38    |   1.69   |         0.44         |         0.41        |
|    GRU Model - Glove Embed    |    1.59    |   1.67   |         0.45         |         0.43        |
|   GRU Model - Fasttext Embed  |    1.64    |   1.69   |         0.46         |         0.44        |
|                               |            |          |                      |                     |
|   LSTM Model - Scratch Embed  |    1.38    |   1.67   |         0.44         |         0.44        |
|    LSTM Model - Glove Embed   |    1.63    |   1.68   |         0.44   