In [1]:
import gc
import os
import sys
from multiprocessing import Process

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

import nltk.translate.bleu_score as bleu

from tensorflow.keras import backend as K

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

sys.path.insert(0, r"../utilities/")
sys.path.insert(0, r"../Seq2Seq/")
sys.path.insert(0, r"../Seq2SeqAttention/")
sys.path.insert(0, r"../Transformer/")

from utils import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
from Seq2SeqTrainer import Seq2SeqTrainer
from Seq2SeqAttentionTrainer import Seq2SeqAttentionTrainer
from TransformerTrainer import TransformerTrainer

In [3]:
seq2seqloss = []
Seq2SeqAttentionloss = []
Transformerloss = []

def makePlots(losses, accuracy, name):
    train_losses, test_losses = losses 
    train_accuracyVec, test_accuracyVec = accuracy
    
    fig = plt.figure()
    fig_plot = fig.add_subplot()
    fig_plot.plot(train_losses, label="train_loss")
    fig_plot.plot(test_losses, label="test_loss")
    fig_plot.legend(loc="upper right")
    fig_plot.set_xlabel("epoch")
    fig_plot.set_ylabel("loss")
    fig_plot.grid(linestyle="--")
    fig.savefig("losses_plot_" + name +  ".png")
    fig.show()

    fig = plt.figure()
    fig_plot = fig.add_subplot()
    fig_plot.plot(train_accuracyVec, label="train_accuracy")
    fig_plot.plot(test_accuracyVec, label="test_accuracy")
    fig_plot.legend(loc="lower right")
    fig_plot.set_xlabel("epoch")
    fig_plot.set_ylabel("accuracy")
    fig_plot.grid(linestyle="--")
    fig.savefig("accuracy_plot.png")
    fig.show()

In [4]:
data_dir = "../data/"
# reading data

#en_lines, fr_lines = read_data_files(data_dir, ("small_vocab_en", "small_vocab_fr"))

data = read_data(os.path.join(data_dir, "fra-eng"), "fra.txt")
en_lines, fr_lines = list(zip(*data))
en_lines_raw, fr_lines_raw = shuffle(en_lines, fr_lines)

en_lines = en_lines_raw[:40000]
fr_lines = fr_lines_raw[:40000]

en_lines = [normalize(line) for line in en_lines]
fr_lines = [normalize(line) for line in fr_lines]

en_train, en_test, fr_train, fr_test = train_test_split(en_lines, fr_lines, shuffle=True, test_size=0.1)

en_lines = en_test
fr_lines = fr_test

fr_train_in = ['<start> ' + line for line in fr_train]
fr_train_out = [line + ' <end>' for line in fr_train]

fr_test_in = ['<start> ' + line for line in fr_test]
fr_test_out = [line + ' <end>' for line in fr_test]

fr_tokenizer = Tokenizer(filters='')
en_tokenizer = Tokenizer(filters='')

input_data = [fr_train_in, fr_train_out, fr_test_in, fr_test_out, fr_test, fr_train]
fr_train_in, fr_train_out, fr_test_in, fr_test_out, fr_test, fr_train = tokenizeInput(input_data,
                                                                                      fr_tokenizer)
input_data = [en_train, en_test]
en_train, en_test = tokenizeInput(input_data, en_tokenizer)

en_vocab_size = len(en_tokenizer.word_index)+1
fr_vocab_size = len(fr_tokenizer.word_index)+1
print("en_vocab {}\nfr_vocab {}" .format(en_vocab_size, fr_vocab_size))
print(len(en_lines))

reading data from  ../data/fra-eng/fra.txt
en_vocab 8331
fr_vocab 13576
4000


In [5]:
prediction_idx = np.random.randint(low=40000, high=len(en_lines_raw), size=10)
print("TEST_TEXTS")
test_text = [(en_lines_raw[idx], fr_lines_raw[idx]) for idx in prediction_idx]
for (en,fr) in test_text:
    print(en, " - ", fr)

TEST_TEXTS
That was my idea.  -  C'était mon idée.
I was watching TV when the telephone rang.  -  Je regardais la télé lorsque le téléphone a sonné.
I want to trust you.  -  Je veux vous faire confiance.
I don't know what could've happened.  -  J'ignore ce qui aurait pu arriver.
We need to find a new babysitter.  -  Nous devons trouver une nouvelle baby-sitter.
I'm still waiting for my breakfast. Bring it to me now, please.  -  J'attends toujours mon petit déjeuner, veuillez me l'apporter maintenant.
I know Tom is fast.  -  Je sais que Tom est rapide.
I thought they'd heard us.  -  J'ai pensé qu'elles nous avaient entendus.
We need to concentrate on coming up with a new plan.  -  Nous devons nous concentrer pour trouver un nouveau plan.
It's kind of complicated.  -  C'est plutôt compliqué.


In [6]:
def calculate_bleu(reference, predicted):
    chencherry = bleu.SmoothingFunction()
    four_grams_bleu = bleu.corpus_bleu(reference, predicted, smoothing_function=chencherry.method1)
    three_grams_bleu = bleu.corpus_bleu(reference, predicted, weights=(1./3., 1./3., 1./3.),  smoothing_function=chencherry.method1)
    two_grams_bleu = bleu.corpus_bleu(reference, predicted, weights=(1./2., 1./2.), smoothing_function=chencherry.method1)
    print("    4grams {}\n    3grams {}\n    2grams {}\n\n" .format(four_grams_bleu, three_grams_bleu, two_grams_bleu))

reference = [
    ['this is a ship'.split()],
    ['it is ship'.split()],
    ['ship it is'.split()],
    ['a ship, it is'.split()] # master Yoda
]
print(reference)
pred = [
    'this is a ship'.split(),
    'it is ship'.split(),
    'ship  is'.split(),
    'a ship, it'.split() # master Yoda
]
print(pred)
calculate_bleu(reference, pred)

[[['this', 'is', 'a', 'ship']], [['it', 'is', 'ship']], [['ship', 'it', 'is']], [['a', 'ship,', 'it', 'is']]]
[['this', 'is', 'a', 'ship'], ['it', 'is', 'ship'], ['ship', 'is'], ['a', 'ship,', 'it']]
    4grams 0.5474911439088136
    3grams 0.7515945109323554
    2grams 0.7918111496765283




In [7]:
LSTM_SIZE = 512
EMBEDDING_SIZE = 256
BATCH_SIZE= 64
EPOCHS = 40

In [8]:
def Seq2SeqPredictions():
    trainer = Seq2SeqTrainer(BATCH_SIZE, LSTM_SIZE, EMBEDDING_SIZE, predict_every=20)
    losses, accuracy = trainer.train([en_train, fr_train_in, fr_train_out], [en_test, fr_test_in, fr_test_out], [en_lines, fr_lines], [en_tokenizer, fr_tokenizer], EPOCHS)
    makePlots(losses, accuracy, "Seq2Seq")
    for (en_text, fr_text) in test_text:
        trainer.predict(en_text, fr_text)
    _, seq2seqloss = losses
    print("starting translation...")
    ref = []
    pred = []
    #for en_text, fr_text in test_text:
    for en_text, fr_text in zip(en_lines, fr_lines):
        ref.append([[word.lower() for word in fr_text.split()]])
        pred.append([word.lower() for word in trainer.translate(en_text)])
    print("starting BLEU calculation...")
    calculate_bleu(ref, pred)
    
def Seq2SeqAttentionPredictions():
    trainer = Seq2SeqAttentionTrainer(BATCH_SIZE, LSTM_SIZE, EMBEDDING_SIZE, predict_every=20)
    losses, accuracy = trainer.train([en_train, fr_train_in, fr_train_out], [en_test, fr_test_in, fr_test_out], [en_lines, fr_lines], [en_tokenizer, fr_tokenizer], EPOCHS, "concat")
    makePlots(losses, accuracy, "Seq2SeqAttention")
    for (en_text, fr_text) in test_text:
        trainer.predict(en_text, fr_text, print_prediction=True)
    _, Seq2SeqAttentionloss = losses
    print("starting translation...")
    ref = []
    pred = []
    #for en_text, fr_text in test_text:
    for en_text, fr_text in zip(en_lines, fr_lines):
        ref.append([[word.lower() for word in fr_text.split()]])
        pred.append([word.lower() for word in trainer.translate(en_text)])
    print("starting BLEU calculation...")
    calculate_bleu(ref, pred)

def TransformerPredictions():
    BATCH_SIZE = 64
    num_layers = 6 # 6
    d_model = 256 # 512
    dff = 512  # 2048
    num_heads = 8
    trainer = TransformerTrainer(BATCH_SIZE, num_layers, d_model, dff, num_heads, predict_every=20)
    losses, accuracy= trainer.train([en_train, fr_train_in, fr_train_out], [en_test, fr_test_in, fr_test_out], [en_lines, fr_lines], [en_tokenizer, fr_tokenizer], EPOCHS)
    makePlots(losses, accuracy, "Transformer")
    for (en_text, fr_text) in test_text:
        trainer.predict(en_text, fr_text)
    _, Transformerloss = losses
    print("starting translation...")
    ref = []
    pred = []
    #for en_text, fr_text in test_text:
    for en_text, fr_text in zip(en_lines, fr_lines):
        ref.append([[word.lower() for word in fr_text.split()]])
        pred.append([word.lower() for word in trainer.translate(en_text)])
    print("starting BLEU calculation...")
    calculate_bleu(ref, pred)

In [None]:
p = Process(target=TransformerPredictions, args=())
p.start()
p.join()

en_vocab 8331
fr_vocab 13576
Number of devices: 4
creating dataset...
input :  Tom has lived in Boston for three years .
output:  Tom vit a Boston depuis trois ans .
training from scratch
INFO:tensorflow:batch_all_reduce: 254 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3').
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3').
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/j

           Predicted : montre-moi cela que je l attends sur moi .
           Correct   : J'attends toujours mon petit déjeuner, veuillez me l'apporter maintenant.
--------------------------END PREDICTION--------------------------
----------------------------PREDICTION----------------------------
           English   : I know Tom is fast.
           Predicted : je sais que tom est avec lui .
           Correct   : Je sais que Tom est rapide.
--------------------------END PREDICTION--------------------------
----------------------------PREDICTION----------------------------
           English   : I thought they'd heard us.
           Predicted : je pensais que j etais assoiffee .
           Correct   : J'ai pensé qu'elles nous avaient entendus.
--------------------------END PREDICTION--------------------------
----------------------------PREDICTION----------------------------
           English   : We need to concentrate on coming up with a new plan.
           Predicted : nous devons no

In [None]:
p = Process(target=Seq2SeqPredictions, args=())
p.start()
p.join()

In [None]:
p = Process(target=Seq2SeqAttentionPredictions, args=())
p.start()
p.join()

In [None]:
    fig = plt.figure()
    fig_plot = fig.add_subplot()
    fig_plot.plot(seq2seqloss, label="seq2seq")
    fig_plot.plot(Seq2SeqAttentionloss, label="seq2seqAttention")
    fig_plot.plot(Transformerloss, label="Transformer")
    fig_plot.legend(loc="upper right")
    fig_plot.set_xlabel("epoch")
    fig_plot.set_ylabel("loss")
    fig_plot.grid(linestyle="--")
    fig.savefig("test_losses_plot" +  ".png")
    fig.show()