# French to English Machine Translation  
## Seq2Seq with GloVe & frWac pre-trained embeddings




Set the path to these files. They should be in the shared folder. Copy to your drive, or upload to colab instance.

In [None]:
# dataset
filtered_dataset_path = '/content/drive/My Drive/MT/pairs.pkl'

# glove 
glove_vectors_path = '/content/drive/My Drive/Embedding Models/6B.100.dat'
glove_words_path = '/content/drive/My Drive/Embedding Models/6B.100_words.pkl'
glove_word2id_path = '/content/drive/My Drive/Embedding Models/6B.100_idx.pkl'

#frWac
frWac200d_path = '/content/drive/My Drive/Embedding Models/frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin'

# model weights
encoder_saved_model_weights = '/content/drive/My Drive/Embedding Models/model_states/simple_encoder_75E.pth'
decoder_saved_model_weights ='/content/drive/My Drive/Embedding Models/model_states/simple_decoder_75E.pth'

In [None]:
!pip install bcolz

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os, re, pickle, collections, bcolz, string
import numpy as np
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm_notebook
from gensim.models import KeyedVectors
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Load and separate sentences

In [None]:
sentences = pickle.load(open(filtered_dataset_path, 'rb'))

In [None]:
en_questions, fr_questions = zip(*sentences)

Define tokenizer.

In [None]:
re_apos = re.compile(r"(\w)'s\b")         # make 's a separate word
re_mw_punc = re.compile(r"(\w[’'])(\w)")  # other ' in a word creates 2 words
re_punc = re.compile("([\"().,;:/_?!—])") # add spaces around punctuation
re_mult_space = re.compile(r"  *")        # replace multiple spaces with just one

def tokenize(sent):
    sent = re_apos.sub(r"\1 's", sent)
    sent = re_mw_punc.sub(r"\1 \2", sent)
    sent = re_punc.sub(r" \1 ", sent).replace('-', ' ')
    sent = re_mult_space.sub(' ', sent)
    return sent.lower().split()

Tokenize english questions.

In [None]:
en_tokens = list(map(tokenize, en_questions))

In [None]:
en_tokens[:4]

[['i', 'm', '.'],
 ['i', 'm', 'ok', '.'],
 ['i', 'm', 'ok', '.'],
 ['i', 'm', 'fat', '.']]

Tokenize french questions.

In [None]:
fr_tokens = list(map(tokenize, fr_questions))

In [None]:
fr_tokens[:4]

[['j', 'ai', 'ans', '.'],
 ['je', 'vais', 'bien', '.'],
 ['ca', 'va', '.'],
 ['je', 'suis', 'gras', '.']]

For each language: 
<br/>- Get vocabulary counter.
<br/>- Get vocabulary.
<br/>- Get dictionary that maps each word to an index.
<br/>- Transform tokens to their corresponding ids.

In [None]:
PAD = 0; SOS = 1

def tokens2ids(sentences):
    vocab_counter = collections.Counter(word for sent in sentences for word in sent)
    vocab = sorted(vocab_counter, key=vocab_counter.get, reverse=True)
    vocab.insert(PAD, '<PAD>')
    vocab.insert(SOS, '<SOS')
    w2id = {word:i for i, word in enumerate(vocab)}
    ids = [[w2id[word] for word in sent] for sent in sentences]
    return vocab_counter, vocab, w2id, ids

In [None]:
en_vocab_counter, en_vocab, en_w2id, en_ids = tokens2ids(en_tokens)
fr_vocab_counter, fr_vocab, fr_w2id, fr_ids = tokens2ids(fr_tokens)

In [None]:
len(en_vocab), len(fr_vocab)

(2803, 4345)

## Word vectors

We will not train word vectors from scratch, we will use <a href='http://nlp.stanford.edu/data/glove.6B.zip'>GloVe</a> for english words and <a href='http://fauconnier.github.io/index.html#wordembeddingmodels'>FrWac2Vec</a> for french words.

#### GloVe preprocessing

We will:
<br/>- Load words.
<br/>- Assign an index to each word.
<br/>- Create a dictionary that maps each word to their ids.
<br/>- Create an array with numeric vectors.

#### Load english words vectors

In [None]:
gl_dim = 100
# list of vectors of len(vocab)
glove_vectors = bcolz.open(glove_vectors_path)[:]
# list of words of len(vocab)
glove_words = pickle.load(open(glove_words_path, 'rb'))
# dict. of words:index of len(vocab)
glove_word2id = pickle.load(open(glove_word2id_path, 'rb'))

We will create a dictionary that maps each glove english word to their corresponding numeric vectors.

In [None]:
glove_word2vec = {word: glove_vectors[glove_word2id[word]] for word in glove_words}
n_glove_vectors, dim_glove_vectors = glove_vectors.shape

In [None]:
n_glove_vectors, dim_glove_vectors

(400000, 100)

We have 400000 glove english words vectors with dimension equal to 100.

In [None]:
glove_word2vec['phone']

array([ 3.1764e-02, -6.0768e-01,  5.2233e-01, -1.1533e-02,  3.6009e-01,
        3.6460e-01, -4.9728e-03, -3.3769e-04,  6.6011e-01, -1.2602e-01,
        2.3832e-01,  5.6113e-02, -1.1328e-01,  3.5199e-01,  2.4070e-01,
       -2.9588e-01, -3.1811e-01,  7.9509e-03,  3.2996e-01, -1.0383e-01,
       -4.0230e-01, -3.7351e-03,  4.7088e-01,  2.2141e-01,  3.3043e-01,
       -4.5048e-01,  3.5376e-01,  5.5943e-01,  2.3509e-01,  5.0190e-02,
        5.7384e-01,  9.1137e-01,  8.9360e-01,  1.3000e-01,  6.7807e-01,
        4.1787e-01, -6.9812e-01, -6.0581e-01,  1.1147e+00, -4.3455e-03,
        4.6439e-01, -3.8663e-01,  9.2078e-02, -3.5278e-01, -9.2302e-01,
        3.7423e-02, -4.9481e-01, -2.0403e-01,  8.0609e-01, -6.7063e-01,
        1.9324e-01,  6.9329e-01,  8.1743e-01,  3.7762e-01,  2.6951e-01,
       -1.7669e+00, -7.0825e-01,  2.7024e-01,  1.9455e+00,  7.2376e-01,
        1.7558e-01, -1.7475e-01,  1.7004e-01, -6.7982e-01, -2.3057e-01,
        8.5733e-02,  7.8184e-01,  3.4410e-01,  8.3690e-01,  2.77

#### French word vectors

In [None]:
fr_w2v = KeyedVectors.load_word2vec_format(frWac200d_path, binary=True)

In [None]:
fr_w2v.vector_size

200

Now we need to create embeddings matrices for english and french words of training corpus. If a word appears on GloVe or frWac then we load its pre-trained vector, otherwise we create a random vector.

In [None]:
def create_embedding(w2v, target_vocab, emb_dim):
    emb_len = len(target_vocab)
    embedding = np.zeros((emb_len, emb_dim))
    words_found = 0
    
    for i, w in enumerate(target_vocab):
        try: 
            embedding[i] = w2v[w]
            words_found += 1
        except KeyError:
            embedding[i] = np.random.normal(scale=0.6, size=(emb_dim, ))
    
    return embedding, words_found

In [None]:
en_emb, words_found = create_embedding(glove_word2vec, en_vocab, 100)

In [None]:
en_emb.shape, words_found

((2803, 100), 2797)

5 words were not in the GloVe embeddings

In [None]:
fr_emb, words_found = create_embedding(fr_w2v, fr_vocab, 200)

In [None]:
fr_emb.shape, words_found

((4345, 200), 3737)

608 words were not in the frWac embeddings -- not great, might affect results. 

## Data preparation

Min, max and mean length of english sentences.

In [None]:
len_en_ids = [len(sentence) for sentence in en_ids]
min(len_en_ids), max(len_en_ids), np.mean(len_en_ids)

(3, 9, 6.035569393338994)

Min, max and mean length of french sentences.

In [None]:
len_fr_ids = [len(sentence) for sentence in fr_ids]
min(len_fr_ids), max(len_fr_ids), np.mean(len_fr_ids)

(2, 9, 6.196056231719973)

We set 30 as max length. In this example, we could use 10 since the max length of every sentence is 9. 30 allows us more flexibility in the future for other datasets.

In [None]:
maxlen = 30

In [None]:
en_train = pad_sequences(en_ids, maxlen, 'int64', 'post', 'post')
fr_train = pad_sequences(fr_ids, maxlen, 'int64', 'post', 'post')

In [None]:
fr_train.shape, en_train.shape, en_emb.shape, fr_emb.shape

((10599, 30), (10599, 30), (2803, 100), (4345, 200))

In [None]:
en_train[0]

array([3, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
fr_train[0]

array([ 27,  30, 115,   2,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0])

In [None]:
torch.cuda.is_available()

True

In [None]:
fr_emb_t = torch.FloatTensor(fr_emb).cuda()
en_emb_t = torch.FloatTensor(en_emb).cuda()

In [None]:
def long_t(arr):
    return Variable(torch.LongTensor(arr)).cuda()

Load pre-trained vectors into an embedding layer.

In [None]:
def create_emb(emb_matrix, non_trainable=False):
    num_embeddings, embedding_dim = emb_matrix.size()
    emb = nn.Embedding(num_embeddings, embedding_dim)
    emb.load_state_dict({'weight': emb_matrix})
    if non_trainable:
        #emb.weight.requires_grad = False
        for param in emb.parameters():
            param.requires_grad = False
    return emb, num_embeddings, embedding_dim

Encoding layer

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, emb_matrix, hidden_size, num_layers=2):
        super(EncoderRNN, self).__init__()
        # Create embedding layer.
        self.embedding, num_embeddings, embedding_dim = create_emb(emb_matrix, True)
        # Create RNN.
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(embedding_dim, hidden_size, num_layers, batch_first=True)
        
    def forward(self, inp, hidden):
        return self.gru(self.embedding(inp), hidden)
    
    def init_hidden(self, batch_size):
        return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))

Decoding layer

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, emb_matrix, hidden_size, num_layers=2):
        super(DecoderRNN, self).__init__()
        # Create embedding layer.
        self.emb, num_embeddings, embedding_dim = create_emb(emb_matrix)
        # Create RNN.
        self.gru = nn.GRU(embedding_dim, hidden_size, num_layers, batch_first=True, bidirectional=False)
        self.out = nn.Linear(hidden_size, num_embeddings)
        
    def forward(self, inp, hidden):
        emb = self.emb(inp).unsqueeze(1)
        res, hidden = self.gru(emb, hidden)
        # Softmax layer, generates probs for each word vector of the embedding layer.
        res = F.log_softmax(self.out(res[:,0]), dim=1)
        return res, hidden

In [None]:
def encode(inp, encoder):
    batch_size, input_length = inp.size()
    hidden = encoder.init_hidden(batch_size).cuda()
    enc_outputs, hidden = encoder.forward(inp, hidden)
    return long_t([SOS]*batch_size), enc_outputs, hidden

Training

We use teaching-force as training approach. Rather than pass to decoder the previous translated word, we pass the real target.

In [None]:
def fit(encoder, decoder, train_dl, n_epochs, enc_optim, dec_optim, criterion):
    bar = tqdm_notebook(total=n_epochs)
    loss_tracker = []
    avg_mom = 0.98
    avg_loss = 0.
    batch_num = 0

    for epoch in range(n_epochs):
        bar2 = tqdm_notebook(total=train_dl.dataset.shape[0] / train_dl.batch_size, desc=f'Epoch {epoch}', leave=False)
        for i, batch in enumerate(train_dl):
            batch_num += 1
            loss = 0
            
            inp = long_t(batch[:, :maxlen])

            targ = long_t(batch[:, maxlen:])
       
            # Encoder creates a vector representation of input french sentence. 
            decoder_input, encoder_output, hidden = encode(inp, encoder)

            # Zero the gradients before running the backward pass.
            enc_optim.zero_grad()
            dec_optim.zero_grad()
            
            targ_length = targ.size()[1]
     
            for di in range(targ_length):
                decoder_output, hidden = decoder(decoder_input, hidden)
                # Teacher forcing: the decoder receives as input the real target instead of predicted word.
                decoder_input = targ[:, di]
                
                # Compute loss.
                loss += criterion(decoder_output, decoder_input)
          
            # Backward pass: compute gradient of the loss with respect to all the learnable parameters of the model.
            loss.backward()

            # Calling the step function on an Optimizer makes an update to its parameters.
            enc_optim.step()
            dec_optim.step()
           
            # Exponentially weighted moving average, to make the reported loss more stable.
            avg_loss = avg_loss * avg_mom + (loss.data.item() / targ_length)  * (1-avg_mom)
            
            # Compute bias-corrected loss estimate.
            debias_loss = avg_loss / (1 - avg_mom**batch_num)
            
            bar2.update()
            
        loss_tracker.append(np.round([epoch, debias_loss], 6))
        print(np.round([epoch, debias_loss], 6))    
        bar.update()
    return loss_tracker

In [None]:
def req_grad_params(o):
    return (param for param in o.parameters() if param.requires_grad)

Initialize models and set parameters

In [None]:
hidden_size = 64 #128
encoder = EncoderRNN(fr_emb_t, hidden_size).cuda()
decoder = DecoderRNN(en_emb_t, hidden_size).cuda()

In [None]:
lr = 1e-2

In [None]:
enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
dec_opt = optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.NLLLoss().cuda()

In [None]:
batch_size = 64

Create a dataloader

In [None]:
train_dl = DataLoader(np.concatenate([fr_train, en_train], 1), batch_size, shuffle=True, num_workers=1)

Train the model

In [None]:
loss_tracker = fit(encoder, decoder, train_dl, 100, enc_opt, dec_opt, criterion)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


HBox(children=(FloatProgress(value=0.0, description='Epoch 0', max=165.609375, style=ProgressStyle(description…

[0.      0.70844]


HBox(children=(FloatProgress(value=0.0, description='Epoch 1', max=165.609375, style=ProgressStyle(description…

[1.       0.547011]


HBox(children=(FloatProgress(value=0.0, description='Epoch 2', max=165.609375, style=ProgressStyle(description…

[2.       0.471759]


HBox(children=(FloatProgress(value=0.0, description='Epoch 3', max=165.609375, style=ProgressStyle(description…

[3.       0.404933]


HBox(children=(FloatProgress(value=0.0, description='Epoch 4', max=165.609375, style=ProgressStyle(description…

[4.       0.363045]


HBox(children=(FloatProgress(value=0.0, description='Epoch 5', max=165.609375, style=ProgressStyle(description…

[5.       0.327765]


HBox(children=(FloatProgress(value=0.0, description='Epoch 6', max=165.609375, style=ProgressStyle(description…

[6.       0.296934]


HBox(children=(FloatProgress(value=0.0, description='Epoch 7', max=165.609375, style=ProgressStyle(description…

[7.       0.272984]


HBox(children=(FloatProgress(value=0.0, description='Epoch 8', max=165.609375, style=ProgressStyle(description…

[8.      0.25791]


HBox(children=(FloatProgress(value=0.0, description='Epoch 9', max=165.609375, style=ProgressStyle(description…

[9.      0.24032]


HBox(children=(FloatProgress(value=0.0, description='Epoch 10', max=165.609375, style=ProgressStyle(descriptio…

[10.        0.227721]


HBox(children=(FloatProgress(value=0.0, description='Epoch 11', max=165.609375, style=ProgressStyle(descriptio…

[11.        0.216466]


HBox(children=(FloatProgress(value=0.0, description='Epoch 12', max=165.609375, style=ProgressStyle(descriptio…

[12.        0.205761]


HBox(children=(FloatProgress(value=0.0, description='Epoch 13', max=165.609375, style=ProgressStyle(descriptio…

[13.        0.198846]


HBox(children=(FloatProgress(value=0.0, description='Epoch 14', max=165.609375, style=ProgressStyle(descriptio…

[14.        0.190996]


HBox(children=(FloatProgress(value=0.0, description='Epoch 15', max=165.609375, style=ProgressStyle(descriptio…

[15.       0.18421]


HBox(children=(FloatProgress(value=0.0, description='Epoch 16', max=165.609375, style=ProgressStyle(descriptio…

[16.        0.176644]


HBox(children=(FloatProgress(value=0.0, description='Epoch 17', max=165.609375, style=ProgressStyle(descriptio…

[17.        0.171591]


HBox(children=(FloatProgress(value=0.0, description='Epoch 18', max=165.609375, style=ProgressStyle(descriptio…

[18.       0.16627]


HBox(children=(FloatProgress(value=0.0, description='Epoch 19', max=165.609375, style=ProgressStyle(descriptio…

[19.       0.16404]


HBox(children=(FloatProgress(value=0.0, description='Epoch 20', max=165.609375, style=ProgressStyle(descriptio…

[20.        0.158922]


HBox(children=(FloatProgress(value=0.0, description='Epoch 21', max=165.609375, style=ProgressStyle(descriptio…

[21.        0.155038]


HBox(children=(FloatProgress(value=0.0, description='Epoch 22', max=165.609375, style=ProgressStyle(descriptio…

[22.        0.151239]


HBox(children=(FloatProgress(value=0.0, description='Epoch 23', max=165.609375, style=ProgressStyle(descriptio…

[23.       0.14891]


HBox(children=(FloatProgress(value=0.0, description='Epoch 24', max=165.609375, style=ProgressStyle(descriptio…

[24.        0.146559]


HBox(children=(FloatProgress(value=0.0, description='Epoch 25', max=165.609375, style=ProgressStyle(descriptio…

[25.        0.142542]


HBox(children=(FloatProgress(value=0.0, description='Epoch 26', max=165.609375, style=ProgressStyle(descriptio…

[26.        0.138555]


HBox(children=(FloatProgress(value=0.0, description='Epoch 27', max=165.609375, style=ProgressStyle(descriptio…

[27.        0.137112]


HBox(children=(FloatProgress(value=0.0, description='Epoch 28', max=165.609375, style=ProgressStyle(descriptio…

[28.        0.135598]


HBox(children=(FloatProgress(value=0.0, description='Epoch 29', max=165.609375, style=ProgressStyle(descriptio…

[29.        0.130075]


HBox(children=(FloatProgress(value=0.0, description='Epoch 30', max=165.609375, style=ProgressStyle(descriptio…

[30.        0.128251]


HBox(children=(FloatProgress(value=0.0, description='Epoch 31', max=165.609375, style=ProgressStyle(descriptio…

[31.        0.132815]


HBox(children=(FloatProgress(value=0.0, description='Epoch 32', max=165.609375, style=ProgressStyle(descriptio…

[32.       0.13426]


HBox(children=(FloatProgress(value=0.0, description='Epoch 33', max=165.609375, style=ProgressStyle(descriptio…

[33.        0.131446]


HBox(children=(FloatProgress(value=0.0, description='Epoch 34', max=165.609375, style=ProgressStyle(descriptio…

[34.        0.130581]


HBox(children=(FloatProgress(value=0.0, description='Epoch 35', max=165.609375, style=ProgressStyle(descriptio…

[35.        0.123785]


HBox(children=(FloatProgress(value=0.0, description='Epoch 36', max=165.609375, style=ProgressStyle(descriptio…

[36.        0.120801]


HBox(children=(FloatProgress(value=0.0, description='Epoch 37', max=165.609375, style=ProgressStyle(descriptio…

[37.        0.119396]


HBox(children=(FloatProgress(value=0.0, description='Epoch 38', max=165.609375, style=ProgressStyle(descriptio…

[38.        0.116563]


HBox(children=(FloatProgress(value=0.0, description='Epoch 39', max=165.609375, style=ProgressStyle(descriptio…

[39.        0.117333]


HBox(children=(FloatProgress(value=0.0, description='Epoch 40', max=165.609375, style=ProgressStyle(descriptio…

[40.        0.114658]


HBox(children=(FloatProgress(value=0.0, description='Epoch 41', max=165.609375, style=ProgressStyle(descriptio…

[41.        0.118584]


HBox(children=(FloatProgress(value=0.0, description='Epoch 42', max=165.609375, style=ProgressStyle(descriptio…

[42.        0.115583]


HBox(children=(FloatProgress(value=0.0, description='Epoch 43', max=165.609375, style=ProgressStyle(descriptio…

[43.        0.117002]


HBox(children=(FloatProgress(value=0.0, description='Epoch 44', max=165.609375, style=ProgressStyle(descriptio…

[44.        0.119654]


HBox(children=(FloatProgress(value=0.0, description='Epoch 45', max=165.609375, style=ProgressStyle(descriptio…

[45.       0.11753]


HBox(children=(FloatProgress(value=0.0, description='Epoch 46', max=165.609375, style=ProgressStyle(descriptio…

[46.       0.11918]


HBox(children=(FloatProgress(value=0.0, description='Epoch 47', max=165.609375, style=ProgressStyle(descriptio…

[47.        0.117074]


HBox(children=(FloatProgress(value=0.0, description='Epoch 48', max=165.609375, style=ProgressStyle(descriptio…

[48.        0.111911]


HBox(children=(FloatProgress(value=0.0, description='Epoch 49', max=165.609375, style=ProgressStyle(descriptio…

[49.        0.105048]


HBox(children=(FloatProgress(value=0.0, description='Epoch 50', max=165.609375, style=ProgressStyle(descriptio…

[50.        0.105949]


HBox(children=(FloatProgress(value=0.0, description='Epoch 51', max=165.609375, style=ProgressStyle(descriptio…

[51.        0.105943]


HBox(children=(FloatProgress(value=0.0, description='Epoch 52', max=165.609375, style=ProgressStyle(descriptio…

[52.        0.106905]


HBox(children=(FloatProgress(value=0.0, description='Epoch 53', max=165.609375, style=ProgressStyle(descriptio…

[53.        0.108084]


HBox(children=(FloatProgress(value=0.0, description='Epoch 54', max=165.609375, style=ProgressStyle(descriptio…

[54.        0.112031]


HBox(children=(FloatProgress(value=0.0, description='Epoch 55', max=165.609375, style=ProgressStyle(descriptio…

[55.        0.110391]


HBox(children=(FloatProgress(value=0.0, description='Epoch 56', max=165.609375, style=ProgressStyle(descriptio…

[56.        0.110334]


HBox(children=(FloatProgress(value=0.0, description='Epoch 57', max=165.609375, style=ProgressStyle(descriptio…

[57.        0.108132]


HBox(children=(FloatProgress(value=0.0, description='Epoch 58', max=165.609375, style=ProgressStyle(descriptio…

[58.        0.107119]


HBox(children=(FloatProgress(value=0.0, description='Epoch 59', max=165.609375, style=ProgressStyle(descriptio…

[59.        0.108573]


HBox(children=(FloatProgress(value=0.0, description='Epoch 60', max=165.609375, style=ProgressStyle(descriptio…

[60.        0.107675]


HBox(children=(FloatProgress(value=0.0, description='Epoch 61', max=165.609375, style=ProgressStyle(descriptio…

[61.        0.106986]


HBox(children=(FloatProgress(value=0.0, description='Epoch 62', max=165.609375, style=ProgressStyle(descriptio…

[62.        0.105929]


HBox(children=(FloatProgress(value=0.0, description='Epoch 63', max=165.609375, style=ProgressStyle(descriptio…

[63.        0.101189]


HBox(children=(FloatProgress(value=0.0, description='Epoch 64', max=165.609375, style=ProgressStyle(descriptio…

[64.        0.103191]


HBox(children=(FloatProgress(value=0.0, description='Epoch 65', max=165.609375, style=ProgressStyle(descriptio…

[65.        0.104381]


HBox(children=(FloatProgress(value=0.0, description='Epoch 66', max=165.609375, style=ProgressStyle(descriptio…

[66.        0.105839]


HBox(children=(FloatProgress(value=0.0, description='Epoch 67', max=165.609375, style=ProgressStyle(descriptio…

[67.        0.106027]


HBox(children=(FloatProgress(value=0.0, description='Epoch 68', max=165.609375, style=ProgressStyle(descriptio…

[68.        0.108365]


HBox(children=(FloatProgress(value=0.0, description='Epoch 69', max=165.609375, style=ProgressStyle(descriptio…

[69.       0.10249]


HBox(children=(FloatProgress(value=0.0, description='Epoch 70', max=165.609375, style=ProgressStyle(descriptio…

[70.       0.09962]


HBox(children=(FloatProgress(value=0.0, description='Epoch 71', max=165.609375, style=ProgressStyle(descriptio…

[71.        0.103341]


HBox(children=(FloatProgress(value=0.0, description='Epoch 72', max=165.609375, style=ProgressStyle(descriptio…

[72.        0.100593]


HBox(children=(FloatProgress(value=0.0, description='Epoch 73', max=165.609375, style=ProgressStyle(descriptio…

[73.        0.102869]


HBox(children=(FloatProgress(value=0.0, description='Epoch 74', max=165.609375, style=ProgressStyle(descriptio…

[74.        0.097152]


HBox(children=(FloatProgress(value=0.0, description='Epoch 75', max=165.609375, style=ProgressStyle(descriptio…

[75.        0.093604]


HBox(children=(FloatProgress(value=0.0, description='Epoch 76', max=165.609375, style=ProgressStyle(descriptio…

[76.        0.095107]


HBox(children=(FloatProgress(value=0.0, description='Epoch 77', max=165.609375, style=ProgressStyle(descriptio…

[77.        0.101236]


HBox(children=(FloatProgress(value=0.0, description='Epoch 78', max=165.609375, style=ProgressStyle(descriptio…

[78.        0.101372]


HBox(children=(FloatProgress(value=0.0, description='Epoch 79', max=165.609375, style=ProgressStyle(descriptio…

[79.        0.106768]


HBox(children=(FloatProgress(value=0.0, description='Epoch 80', max=165.609375, style=ProgressStyle(descriptio…

[80.        0.107595]


HBox(children=(FloatProgress(value=0.0, description='Epoch 81', max=165.609375, style=ProgressStyle(descriptio…

[81.        0.101892]


HBox(children=(FloatProgress(value=0.0, description='Epoch 82', max=165.609375, style=ProgressStyle(descriptio…

[82.        0.107898]


HBox(children=(FloatProgress(value=0.0, description='Epoch 83', max=165.609375, style=ProgressStyle(descriptio…

Exception in thread Thread-5:
Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/dist-packages/tqdm/_monitor.py", line 78, in run
    instances = self.get_instances()
  File "/usr/local/lib/python3.6/dist-packages/tqdm/_monitor.py", line 58, in get_instances
    return [i for i in self.tqdm_cls._instances.copy()
  File "/usr/lib/python3.6/_weakrefset.py", line 92, in copy
    return self.__class__(self)
  File "/usr/lib/python3.6/_weakrefset.py", line 50, in __init__
    self.update(data)
  File "/usr/lib/python3.6/_weakrefset.py", line 119, in update
    for element in other:
  File "/usr/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



[83.        0.103836]


HBox(children=(FloatProgress(value=0.0, description='Epoch 84', max=165.609375, style=ProgressStyle(descriptio…

[84.        0.099611]


HBox(children=(FloatProgress(value=0.0, description='Epoch 85', max=165.609375, style=ProgressStyle(descriptio…

[85.       0.09503]


HBox(children=(FloatProgress(value=0.0, description='Epoch 86', max=165.609375, style=ProgressStyle(descriptio…

[86.       0.10205]


HBox(children=(FloatProgress(value=0.0, description='Epoch 87', max=165.609375, style=ProgressStyle(descriptio…

[87.       0.10449]


HBox(children=(FloatProgress(value=0.0, description='Epoch 88', max=165.609375, style=ProgressStyle(descriptio…

[88.        0.103836]


HBox(children=(FloatProgress(value=0.0, description='Epoch 89', max=165.609375, style=ProgressStyle(descriptio…

[89.        0.113255]


HBox(children=(FloatProgress(value=0.0, description='Epoch 90', max=165.609375, style=ProgressStyle(descriptio…

[90.        0.107883]


HBox(children=(FloatProgress(value=0.0, description='Epoch 91', max=165.609375, style=ProgressStyle(descriptio…

[91.        0.101228]


HBox(children=(FloatProgress(value=0.0, description='Epoch 92', max=165.609375, style=ProgressStyle(descriptio…

[92.        0.101301]


HBox(children=(FloatProgress(value=0.0, description='Epoch 93', max=165.609375, style=ProgressStyle(descriptio…

[93.        0.096082]


HBox(children=(FloatProgress(value=0.0, description='Epoch 94', max=165.609375, style=ProgressStyle(descriptio…

[94.        0.097008]


HBox(children=(FloatProgress(value=0.0, description='Epoch 95', max=165.609375, style=ProgressStyle(descriptio…

[95.        0.095682]


HBox(children=(FloatProgress(value=0.0, description='Epoch 96', max=165.609375, style=ProgressStyle(descriptio…

[96.        0.097961]


HBox(children=(FloatProgress(value=0.0, description='Epoch 97', max=165.609375, style=ProgressStyle(descriptio…

[97.        0.104964]


HBox(children=(FloatProgress(value=0.0, description='Epoch 98', max=165.609375, style=ProgressStyle(descriptio…

[98.        0.104201]


HBox(children=(FloatProgress(value=0.0, description='Epoch 99', max=165.609375, style=ProgressStyle(descriptio…

[99.        0.106214]


In [None]:
with open('/content/gloveloss.pkl', 'wb') as f:
  pickle.dump(loss_tracker, f)

In [None]:
from google.colab import files
files.download('/content/gloveloss.pkl') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Result trackers:   
  
Using prefix dataset of size 10599:  
Best loss: 0.156 @ 34th epoch.

Using question dataset of size 21769:  
Best loss: 0.2979 @ 30th epoch



In [None]:
torch.save(encoder.state_dict(), f'/content/glove_encoder_0.106.pth')
torch.save(decoder.state_dict(), f'/content/glove_decoder_0.106.pth')

Load model weights

In [None]:
encoder.load_state_dict(torch.load(encoder_saved_model_weights))

<All keys matched successfully>

In [None]:
decoder.load_state_dict(torch.load(decoder_saved_model_weights))

<All keys matched successfully>

In order to generate predictions of a french sentence:
<br/>1- Tokenize.
<br/>2- Transform words to their ids.
<br/>3- Set sentence length = 30.
<br/>3- Encode.
<br/>4- Decode next translated word until the decoder generates a special word that means end of sentence or until reach the max length = 30.

In [None]:
def sent2ids(sent):
    ids = [fr_w2id[t] for t in tokenize(sent)]
    return pad_sequences([ids], maxlen, 'int64', 'post', 'post')
  

In [None]:
def evaluate(inp):
    decoder_input, encoder_outputs, hidden = encode(inp, encoder)
    target_length = maxlen
    
    decoded_words = []
    for di in range(target_length):
        decoder_output, hidden = decoder(decoder_input, hidden)
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0]
        if ni==PAD:
            break
        decoded_words.append(en_vocab[ni])
        decoder_input = long_t([ni])
    
    return decoded_words

In [None]:
def fr2en(sent):
    ids = long_t(sent2ids(sent))
    translation = evaluate(ids)
    return ' '.join(translation)

Bleu

In [None]:
from nltk.translate.bleu_score import sentence_bleu
# returns the one gram bleu score 

def bleu(reference,candidate):
  one_gram = sentence_bleu([reference], candidate, weights=(1, 0, 0, 0))
  return(one_gram)

In [None]:
from nltk.translate.gleu_score import sentence_gleu

def gleu(reference, candidate):
  one_gram = sentence_gleu(reference, candidate)
  return (one_gram)

Evaluate n random pairs with bleu

In [None]:
pairs = [[fr_questions[idx],en_questions[idx]] for idx,i in enumerate(sentences) ]

In [None]:
import random
import warnings
warnings.filterwarnings("ignore")

# evaluate n random sentence pairs using one gram bleu

def evaluateRandomly(encoder, decoder, n=25):
    score_tracker = []
    for i in range(n):
        pair = random.choice(pairs)
        print(i+1)

        output_words = fr2en(pair[0])
        output_sentence = ''.join(output_words)
        
        #bleu
        ref = pair[1].split()[:-1]
        pred = output_sentence.split()[:-1]
        ref, pred = fix_contractions(ref, pred)

        print('>', pair[0])
        print('=', ref)
        print('<', pred,'\n')
        one_gram = bleu(ref,pred)
        score_tracker.append(one_gram)
        print(f'Bleu Score: {one_gram}')
        print('')

    print('Avg Bleu Score (based on one-gram): ',sum(score_tracker)/len(score_tracker))

In [None]:
# sometimes ending punctuation is filtered off prediction when no EOS token is predicted
# adding it back in to not trigger missed prediction
# input: two lists of words
# output: two lists of words

def fix_punctuation(ref, pred):
  ending_punc = [ref[-1]]
  if pred[-1] not in ending_punc:
    pred.append(ending_punc[0])
  return ref, pred

In [None]:
# create a custom score by averaging bs and gs scores with weights
# adding a bonus if different words have a shared semantic meaning (cs score > 0.3)
# subtracting points if predicted sequence has duplicated words
# inputs: bleu score, gleu score, # of double words in pred
# outputs: custom score

def custom_score(bs,gs,cs,double_word_penalty, verbose=True):
  total = ((bs*.75)+(gs*.25)) # weighted avg
  cs_bonus = 0

  # calc cs bonus
  def get_bonus(cs,multiplier = .4):
    additional = 0
    for i in cs:
      additional += i * multiplier
    return additional

  # if we have similarities, compute bonus
  if cs: 
    cs_bonus = get_bonus(cs)


  # if perfect score, return 1
  if bs == 1:
    if verbose:
      print('\nSemantic similarity bonus : +', float(cs_bonus))
      print('Double word penalty:        -', double_word_penalty * .1,'\n')
    return 1.00

  else:
    if cs_bonus:
      grand_total = total + cs_bonus
      if grand_total < 1:
        if verbose:
          print('\nSemantic similarity bonus : +', float(cs_bonus))
          print('Double word penalty:        -', double_word_penalty * .1,'\n')
        return grand_total

      # bonus put score over 1  
      else:
        cs_bonus = get_bonus(cs, multiplier = .3)
        if verbose:
          print('\nTotal score > 1, adjusting weights...') #debug statement, delete at end
          print('\nSemantic similarity bonus : +', float(cs_bonus))
          print('Double word penalty:        -', double_word_penalty * .1,'\n')
        grand_total = total + cs_bonus
        if grand_total < 1:   
          return grand_total

        # bonus put score over 1   
        else:
          cs_bonus = get_bonus(cs, multiplier = .2)
          grand_total = total + cs_bonus
          if grand_total < 1:  
            return grand_total
          else:
            cs_bonus = get_bonus(cs, multiplier = .1)
            grand_total = total + cs_bonus
            if grand_total < 1:   
              return grand_total



    # if no cs bonus      
    else:
      if verbose:
        print('\nSemantic similarity bonus : +', float(cs_bonus))
        print('Double word penalty:        -', double_word_penalty * .1,'\n')
      return total - (double_word_penalty * .1)

In [None]:
import gensim
w2v_model = gensim.models.KeyedVectors.load_word2vec_format('/content/drive/My Drive/Embedding Models/word2vec.bin', binary=True)




Compare Ref to Pred

In [None]:
# calculates penalties for double words, filters sentences to relevant words to compare,
# calculateds cos. sim., and a score for the strength of the cos. sims.
# inputs: two lists of words
# outputs: relevant word cosine sim. scores over 0.3, number of double words in the prediction

def similarities(A,B,verbose=True):

  # does B have more double words than A? 
  doublesA = 0
  doublesB = 0
  basketA = []
  basketB = []

  for i in A:
    if i not in basketA:
      basketA.append(i)
    else:
      doublesA += 1

  for i in B: 
    if i not in basketB:
      basketB.append(i)
    else:
      doublesB += 1

  # calc penalty, keep only positive values
  double_word_penalty = np.clip(doublesB - doublesA, 0,3) 

  # get words not in the other sentence and not in punc/stopwords
  stop_words = ['a','an','of','the','to','on','t','in','as'] #,'not','no']
  punc = ['.','?','!',',']
  extraW = [] # all extra words
  extraA = []
  extraB = []

  for i in A:
    if i not in punc:
      if i not in stop_words:
        if (i not in B):
          extraA.append(i)
  for i in B:
    if i not in punc:
      if i not in stop_words:
        if (i not in A):
          extraB.append(i)

  extraW = extraA + extraB

  # if off by one word, exit
  if len(extraW) == 1:
    return [0, double_word_penalty]
  
  # calc cos sims and score
  sim_finn = []
  sim_w2v = []
  record = []

  for a, b in itertools.product(extraA,extraB):
    sim_w2v.append([a,b,w2v_model.similarity(a,b)])

  sorted_sim_w2v = sorted(sim_w2v, key = lambda x: x[2], reverse=True)

  cs_score = [0] #list of cs over 0.3

  # print cs scores
  if sorted_sim_w2v:
    if verbose:
      print('\nSemantic similarities using w2v:')
    for idx,i in enumerate(sorted_sim_w2v):
      if verbose:
        if i[2] > 0.3:
          print(bold, end="")
          print('  ',i,reset)
        else:
          print('  ',i)
    #print('\n')
    # record cs scores
    for i in sorted_sim_w2v:
      if i[2] > 0.3:
        cs_score.append(i[2])

  return [cs_score, double_word_penalty]

In [None]:
# evaluate n random pairs from dataset
# input: models, n
# output: none

def evaluateRandomly(encoder, decoder, n=10, all = False):
    print(bold+'Evaluation of Machine Translation Model'+reset)
    
    bleu_score_tracker = []
    gleu_score_tracker = []
    custom_tracker = []
    record_test = []
    ending_punc = ['.','?','!']
    glove_frwac_ref_pred = []

    if not all: # run on n randomly chosen pairs
      print(bold+'Evaluating ',n,' examples...'+reset)
      for i in range(n):
          pair = random.choice(pairs)

          output_words = fr2en(pair[0])
 
          # lists
          ref = pair[1].split()#[:-1]
          pred = output_words.split()[:-1]

          # fix contractions
          ref, pred = fix_contractions(ref, pred)

          # if missing ending punctuation
          if pred[-1] not in ending_punc:
            ref, pred = fix_punctuation(ref, pred) 

          glove_frwac_ref_pred.append([i,ref,pred])

          print('Before Bleu: ',ref[:-1],pred[:-1])
          bleu_one_gram = bleu(ref[:-1],pred[:-1])


          # DO NOT DISPLAY PERFECT SCORES - USED FOR EASY DEBUGGING - DELETE AT END
          # CONVERT TO DISPLAYING PERFECT SCORES SOME FRACTION OF THE TIME (1/5TH?)
          if bleu_one_gram:
            print('\n')
            print(bold+'Input:\t'+reset, pair[0])
            print(bold+'Target:\t'+reset, ' '.join(ref))

            # GOOGLE TRANSLATE FUNCTION - ALLOTED LIMITED TRANSLATIONS PER DAY
            #gt = google_translate(pair[0])
            #gt = normalizeString(gt)
            #gt, _ = fix_contractions(gt.split(),' ')
            #gt = ' '.join(gt)
            #print('GT:\t',gt)
            #bleu_one_gram_gt = bleu([ref[:-1]],gt[:-1])
            #print('GT Bleu Score: ',bleu_one_gram_gt)
            #if bleu_one_gram > bleu_one_gram_gt:
              #print('Better than GT!')

            print(bold+'Pred:\t'+reset, ' '.join(pred),'\n')
            
            # requires ref to be a 2d list, pred 1d list
            bleu_score_tracker.append(bleu_one_gram)
            print(f'Bleu Score: {bleu_one_gram:.3f}')

            gleu_one_gram = gleu([ref[:-1]],pred[:-1])
            gleu_score_tracker.append(gleu_one_gram)
            print(f'Gleu Score: {gleu_one_gram:.3f}')
            print(f'Avg Score:  {(gleu_one_gram*.25+bleu_one_gram*.75):.3f}') #weighted
            
            cs_score = 0

            # if not perfect score: calc. bonuses and penalties
            if bleu_one_gram < 1:
              try: # sometimes sims returns none
                sim_returns = similarities(ref,pred)
                cs_score = sim_returns[0]
                double_word_penalty = sim_returns[1]
                cust_score = custom_score(bleu_one_gram,gleu_one_gram,cs_score,double_word_penalty)
                print(f'{bold_red_font_tag}Custom Score: {cust_score:.3f}{reset}')
                #print('\n')

              # if word not in WE
              except KeyError:
                print('Cosine similarities: Word not found in embedding vocabulary')
                continue
            else:
              cust_score = custom_score(bleu_one_gram,gleu_one_gram,0,0)
              print(f'{bold_red_font_tag}Custom Score: {cust_score:.3f}{reset}')

            custom_tracker.append(cust_score)

      print('\n')
      print(f'{bold_blue_font_tag}Avg Bleu Score  :{reset} {sum(bleu_score_tracker)/len(bleu_score_tracker):.3f}')
      print(f'{bold_blue_font_tag}Avg Gleu Score  :{reset} {sum(gleu_score_tracker)/len(gleu_score_tracker):.3f}')
      print(f'{bold_blue_font_tag}Avg Custom Score:{reset} {sum(custom_tracker)/len(custom_tracker):.3f}')

    else: # run on entire dataset
      print(bold+'Evaluating entire dataset...'+reset)
      for i in range(len(pairs)):

        
        pair = pairs[i]
        output_words = fr2en(pair[0])
        ref = pair[1].split()#[:-1]
        pred = output_words.split()[:-1]

        # fix contractions
        ref, pred = fix_contractions(ref, pred)

        # if missing ending punctuation
        if pred[-1] not in ending_punc:
          ref, pred = fix_punctuation(ref, pred) 

        glove_frwac_ref_pred.append([i,ref,pred])

        bleu_one_gram = bleu(ref[:-1],pred[:-1])
        bleu_score_tracker.append(bleu_one_gram)
        gleu_one_gram = gleu([ref[:-1]],pred[:-1])
        gleu_score_tracker.append(gleu_one_gram)

        cs_score = 0

        # if not perfect score: calc. bonuses and penalties
        if bleu_one_gram < 1:
          try: # sometimes sims returns none
            sim_returns = similarities(ref,pred, verbose=False)
            cs_score = sim_returns[0]
            double_word_penalty = sim_returns[1]
            cust_score = custom_score(bleu_one_gram,gleu_one_gram,cs_score,double_word_penalty, verbose=False)
            #print(bold_red_font_tag+'Custom Score: ',cust_score,reset)
            #print('\n')

          # if word not in WE
          except KeyError:
            #print('Cosine similarities: Word not found in embedding vocabulary')
            continue
        else:
          cust_score = custom_score(bleu_one_gram,gleu_one_gram,0,0, verbose=False)
          #print(bold_red_font_tag+'Custom Score: ',cust_score,reset)

        custom_tracker.append(cust_score)

      print('\n')
      print(f'{bold_blue_font_tag}Avg Bleu Score  :{reset} {sum(bleu_score_tracker)/len(bleu_score_tracker):.3f}')
      print(f'{bold_blue_font_tag}Avg Gleu Score  :{reset} {sum(gleu_score_tracker)/len(gleu_score_tracker):.3f}')
      print(f'{bold_blue_font_tag}Avg Custom Score:{reset} {sum(custom_tracker)/len(custom_tracker):.3f}')

    return glove_frwac_ref_pred

Evaluate bleu score on entire on all training data

In [None]:
# get bleu score for entire dataset

def bleuScore(encoder, decoder):
  score_tracker = []
  for i in range(len(pairs)):
      pair = pairs[i]
      #print('pair: ',pair)
      output_words = fr2en(pair[0])
      #print(output_words)
      output_sentence = output_words
      ref = pair[1].split()[:-1]
      pred = output_sentence.split()[:-1]
      ref, pred = fix_contractions(ref, pred)
      #print(ref)
      #print(pred)
      one_gram = bleu(ref,pred)
      score_tracker.append(one_gram)


  print('Avg Bleu Score: ',sum(score_tracker)/len(score_tracker))

    

In [None]:
# fix issues with contractions when displaying results.
# issues: 's' could represent possesion and not 'is.' Small fraction of the time though.
# inputs: two lists of words
# outputs: two lists of words

def fix_contractions(ref,pred):

  for idx, word in enumerate(pred):
    if word == 're':
      pred[idx] = 'are'
    elif word == 'm':
      pred[idx] = 'am' 
    elif word == 's':
      pred[idx] = 'is'   
    elif word == 'ok':
      pred[idx] = 'okay'  
    elif word == 'aren': 
      try:
        if pred[idx+1] == 't':
          pred[idx] = 'are' 
          pred[idx+1] = 'not'
      except IndexError:
        continue
    elif (word == 'isn' and pred[idx+1] == 't'):
      pred[idx] = 'is' 
      pred[idx+1] = 'not'
    elif (word == 'don' and pred[idx+1] == 't'):
      pred[idx] = 'do' 
      pred[idx+1] = 'not'

  for idx, rword in enumerate(ref):
    if rword == 're':
      ref[idx] = 'are'
    elif rword == 'm':
      ref[idx] = 'am' 
    elif rword == 'ok':
      ref[idx] = 'okay'       
    elif rword == 's':
      ref[idx] = 'is'  
    elif rword == 'aren': 
      try:
        if ref[idx+1] == 't':
          ref[idx] = 'are' 
          ref[idx+1] = 'not'      
      except IndexError:
        continue    
    elif (rword == 'isn' and ref[idx+1] == 't'):
      ref[idx] = 'is' 
      ref[idx+1] = 'not' 
    elif (rword == 'don' and ref[idx+1] == 't'):
      ref[idx] = 'do' 
      ref[idx+1] = 'not' 

  return ref, pred

In [None]:
bold_blue_font_tag = '\x1b[1m\x1b[34m'
bold_red_font_tag = '\x1b[1m\x1b[31m'
bold_gree_font_tag = '\x1b[1m\x1b[32m'
magenta = '\033[35m'
bold = '\033[1m'
reset = '\033[0m'

Evaluate

In [None]:
import itertools 

In [None]:
glove_frwac_ref_pred = evaluateRandomly(encoder, decoder, all= True)

[1mEvaluation of Machine Translation Model[0m
[1mEvaluating entire dataset...[0m


[1m[34mAvg Bleu Score  :[0m 0.764
[1m[34mAvg Gleu Score  :[0m 0.676
[1m[34mAvg Custom Score:[0m 0.815


Save results to pickle file for scratch notebook

In [None]:
with open('/content/glove_frwac_results.pkl', 'wb') as f:
  pickle.dump(glove_frwac_ref_pred, f)

Evaluate the entire dataset

In [None]:
with open('/content/glove_frwac_results.pkl', 'rb') as f:
  mynewlist = pickle.load(f)

In [None]:
bleuScore(encoder, decoder)

Avg Bleu Score:  0.7636377341602689


In [None]:
sent = ['i am happy to see you','je suis content de te voir']

In [None]:


output_words = fr2en(sent[1])

output_sentence = ''.join(output_words)

ref = sent[0].split()
pred = output_sentence.split()[:-2]
ref, pred = fix_contractions(ref, pred)
one_gram = bleu(ref,pred)
print('Input:\t\t', sent[1])
print('Target:\t\t', ' '.join(ref))
print('Prediction:\t', ' '.join(pred),'\n')
print(f'Bleu Score:\t {one_gram:.2f}')

Input:		 je suis content de te voir
Target:		 i am happy to see you
Prediction:	 i am happy with you 

Bleu Score:	 0.65


# References  

M. (2018). martinpella/lang-translator. GitHub. https://github.com/martinpella/lang-translator  

Robertson, S. (2020). NLP From Scratch: Translation with a Sequence to Sequence Network and Attention — PyTorch Tutorials 1.7.0 documentation. Https://Pytorch.Org. https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html  

Embeddings:  
- https://wacky.sslmit.unibo.it/doku.php?id=corpora     
- http://fauconnier.github.io/index.html#wordembeddingmodels  
  