# Neural Machine Translation

Translation of sentences from source to target language

In [1]:
# run this code when running the code on Google Colab
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.insert(0,'/content/drive/MyDrive/Colab Notebooks/NMT')

Mounted at /content/drive


## 1. Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import unicodedata
import re
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import random
import os
rnn_encoder, rnn_encoder, transformer_encoder, transformer_decoder = None, None, None, None
import math

# libraries for model training and evalutation
import torch.nn as nn
import torch.nn.functional as F
import time
from tqdm.notebook import tqdm
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction, corpus_bleu

## 2. Downloaing Data

Downloading the dataset and arranging the source and target sentences in a dataframe

In [3]:
# downloading the data
if __name__ == '__main__':
    os.system("wget http://www.manythings.org/anki/fra-eng.zip")
    os.system("unzip -o fra-eng.zip")

# arranding the data in a dataframe
if __name__ == '__main__':
    lines = open('fra.txt', encoding='UTF-8').read().strip().split('\n')
    total_num_examples = 50000 
    original_word_pairs = [[w for w in l.split('\t')][:2] for l in lines[:total_num_examples]]
    random.shuffle(original_word_pairs)

    dat = pd.DataFrame(original_word_pairs, columns=['eng', 'fra'])
    print(dat) # Visualize the data

                         eng                        fra
0               Don't leave!           Ne quittez pas !
1             They're awake.         Ils sont éveillés.
2        Everybody panicked.     Tout le monde paniqua.
3            I broke my arm.  Je me suis cassé le bras.
4      I am no longer tired.   Je ne suis plus fatigué.
...                      ...                        ...
49995       I saw her again.             Je l'ai revue.
49996              Back off.               Cassez-vous.
49997    Tom talks too fast.       Tom parle trop vite.
49998    I've seen all that.         J'ai vu tout cela.
49999           Is that all?               C'est tout ?

[50000 rows x 2 columns]


## 3. Preprocessing the data


In [4]:
# Converts the unicode file to ascii
def unicode_to_ascii(s):
    """Normalizes latin chars with accent to their canonical decomposition"""
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
    '''
    Preprocess the sentence to add the start, end tokens and make them lower-case
    '''
    w = unicode_to_ascii(w.lower().strip())
    w = re.sub(r'([?.!,¿])', r' \1 ', w)
    w = re.sub(r'[" "]+', ' ', w)

    w = re.sub(r'[^a-zA-Z?.!,¿]+', ' ', w)
    
    w = w.rstrip().strip()
    w = '<start> ' + w + ' <end>'
    return w

if __name__ == '__main__':
    data = dat.copy()
    data['eng'] = dat.eng.apply(lambda w: preprocess_sentence(w))
    data['fra'] = dat.fra.apply(lambda w: preprocess_sentence(w))
    print(data) # Visualizing the data

                                        eng  \
0               <start> don t leave ! <end>   
1             <start> they re awake . <end>   
2        <start> everybody panicked . <end>   
3            <start> i broke my arm . <end>   
4      <start> i am no longer tired . <end>   
...                                     ...   
49995       <start> i saw her again . <end>   
49996              <start> back off . <end>   
49997    <start> tom talks too fast . <end>   
49998    <start> i ve seen all that . <end>   
49999           <start> is that all ? <end>   

                                            fra  
0                <start> ne quittez pas ! <end>  
1             <start> ils sont eveilles . <end>  
2         <start> tout le monde paniqua . <end>  
3      <start> je me suis casse le bras . <end>  
4       <start> je ne suis plus fatigue . <end>  
...                                         ...  
49995             <start> je l ai revue . <end>  
49996               <start> cassez 

## 4. Building Vocabulary

Arranging the vocabulary of words from the source and target languages in a list

In [5]:
def build_vocabulary(pd_dataframe):
    '''
    Creating a list to store words forming the vocabulary of a chosen language
    '''
    sentences = [sen.split() for sen in pd_dataframe]
    vocabulary = {}
    for sent in sentences:
        for word in sent:
            if word not in vocabulary:
                vocabulary[word] = 1
    return list(vocabulary.keys())

if __name__ == '__main__':
    src_vocab_list = build_vocabulary(data['eng'])
    trg_vocab_list = build_vocabulary(data['fra'])

print("The source vocabulary is: ", src_vocab_list)
print("The target vocabulary is: ", trg_vocab_list)

The target vocabulary is:  ['<start>', 'ne', 'quittez', 'pas', '!', '<end>', 'ils', 'sont', 'eveilles', '.', 'tout', 'le', 'monde', 'paniqua', 'je', 'me', 'suis', 'casse', 'bras', 'plus', 'fatigue', 'elle', 'etait', 'naive', 'apte', 'au', 'service', '?', 'avez', 'vous', 'un', 'animal', 'de', 'compagnie', 'nous', 'devons', 'partir', 'quiconque', 'est', 'il', 'en', 'train', 'regarder', 'peux', 'courir', 'n', 'ai', 'regarde', 'tom', 'allez', ',', 'tentons', 'impatiente', 'vos', 'blagues', 'manquent', 'telephone', 'quelle', 'cette', 'odeur', 't', 'es', 'sympa', 'leur', 'canoe', 'a', 'chavire', 'perdu', 'les', 'pedales', 'ce', 'loin', 'aide', 'moi', 'dresser', 'une', 'liste', 'sais', 'que', 'aimez', 'appele', 'l', 'rencontre', 'fois', 'laissez', 'entendre', 'du', 'manque', 'quelque', 'chose', 's', 'presque', 'noye', 'c', 'idiot', 'j', 'aime', 'cela', 'd', 'accord', 'y', 'ou', 'vit', 'vu', 'la', 'bagarre', 'laisse', 'note', 'fait', 'controler', 'mon', 'audition', 'gardez', 'votre', 'calme', 

## 5. Instantiating the training and target data set

1. **Vocabulary Class** -  A separate class has been created for vocabulary. With this, the vocabulary list for each language can be instantiated into a data structure which stores the words from the vocabulary accompanied with a mapping of these words with numbers serving as their indices. This numbers can be used in the training process by the model.

2. **DataLoader Class** - Each sentence is stored as a list of words. The DataLoader will instantiate this into a long tensor.



In [6]:
# vocabulary class
class Vocab_Lang():
    def __init__(self, vocab):
        self.word2idx = {'<pad>': 0, '<unk>': 1}
        self.idx2word = {0: '<pad>', 1: '<unk>'}
        self.vocab = vocab
        
        for index, word in enumerate(vocab):
            self.word2idx[word] = index + 2 # +2 because of <pad> and <unk> token
            self.idx2word[index + 2] = word
    
    def __len__(self):
        return len(self.word2idx)

# data loader class
class MyData(Dataset):
    def __init__(self, X, y):
        self.length = torch.LongTensor([np.sum(1 - np.equal(x, 0)) for x in X])
        self.data = torch.LongTensor(X)
        self.target = torch.LongTensor(y)
    
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        return x, y

    def __len__(self):
        return len(self.data)

In [7]:
def pad_sequences(x, max_len):
    """ 
    Adding padding to sentences of length smaller than the maximum sentence length
    """
    padded = np.zeros((max_len), dtype=np.int64)
    if len(x) > max_len:
        padded[:] = x[:max_len]
    else:
        padded[:len(x)] = x
    return padded


def preprocess_data_to_tensor(dataframe, src_vocab, trg_vocab):
    # Vectorize the input and target languages
    src_tensor = [[src_vocab.word2idx[s if s in src_vocab.vocab else '<unk>'] for s in eng.split(' ')] for eng in dataframe['eng'].values.tolist()]
    trg_tensor = [[trg_vocab.word2idx[s if s in trg_vocab.vocab else '<unk>'] for s in fra.split(' ')] for fra in dataframe['fra'].values.tolist()]

    # Calculate the max_length of input and output tensor for padding
    max_length_src, max_length_trg = max(len(t) for t in src_tensor), max(len(t) for t in trg_tensor)
    print('max_length_src: {}, max_length_trg: {}'.format(max_length_src, max_length_trg))

    # Pad all the sentences in the dataset with the max_length
    src_tensor = [pad_sequences(x, max_length_src) for x in src_tensor]
    trg_tensor = [pad_sequences(x, max_length_trg) for x in trg_tensor]

    return src_tensor, trg_tensor, max_length_src, max_length_trg


def train_test_split(src_tensor, trg_tensor):
    '''
    Create training and test sets.
    '''
    total_num_examples = len(src_tensor) - int(0.2*len(src_tensor))
    src_tensor_train, src_tensor_test = src_tensor[:int(0.75*total_num_examples)], src_tensor[int(0.75*total_num_examples):total_num_examples]
    trg_tensor_train, trg_tensor_test = trg_tensor[:int(0.75*total_num_examples)], trg_tensor[int(0.75*total_num_examples):total_num_examples]

    return src_tensor_train, src_tensor_test, trg_tensor_train, trg_tensor_test

The sentences from the source and target language are stored in form of tensors. The data is split into training and testing data set separately. By setting the appropriate hyperparamters of embedding and batch size the tensor data in smapled into batched using the DataLoader module from Pytorch

In [8]:
if __name__ == '__main__':

    # HYPERPARAMETERS 
    BATCH_SIZE = 64
    EMBEDDING_DIM = 256

    src_vocab = Vocab_Lang(src_vocab_list)
    trg_vocab = Vocab_Lang(trg_vocab_list)

    src_tensor, trg_tensor, max_length_src, max_length_trg = preprocess_data_to_tensor(data, src_vocab, trg_vocab)
    src_tensor_train, src_tensor_val, trg_tensor_train, trg_tensor_val = train_test_split(src_tensor, trg_tensor)
    # Create train and val datasets
    train_dataset = MyData(src_tensor_train, trg_tensor_train)
    train_dataset = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True)
    
    test_dataset = MyData(src_tensor_val, trg_tensor_val)
    test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=False)

max_length_src: 11, max_length_trg: 19


  self.data = torch.LongTensor(X)


In [9]:
if __name__ == '__main__':
    idxes = random.choices(range(len(train_dataset.dataset)), k=5)
    src, trg =  train_dataset.dataset[idxes]
    print('Source:', src)
    print('Source Dimensions: ', src.size())
    print('Target:', trg)
    print('Target Dimensions: ', trg.size())

Source: tensor([[   2,   47,   38,  390, 3940,   11,    7,    0,    0,    0,    0],
        [   2,   38,   54,   16,  327,   28,    7,    0,    0,    0,    0],
        [   2,   34,    9, 2408,   11,    7,    0,    0,    0,    0,    0],
        [   2,   96,  152,   54,   11,    7,    0,    0,    0,    0,    0],
        [   2,   14,  319,   80,   32,  964,   11,    7,    0,    0,    0]])
Source Dimensions:  torch.Size([5, 11])
Target: tensor([[   2,   50,   40, 5958,   11,    7,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0],
        [   2,   95,   40,  112, 1183,   29,    7,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0],
        [   2,   36,  272,   42,   43,   34,   36, 2494,   11,    7,    0,    0,
            0,    0,    0,    0,    0,    0,    0],
        [   2,   41,  106, 6365,   11,    7,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0],
        [   2,   16,  295,   48

## 6. Model training
Now we will train a Transformer-based Encoder and Decoder model for learning the translation from the source to target language. We will import the encoder and decoder models and use them in the training process

### RNN Model Training

In [10]:
import rnnencoder
import rnndecoder

### DO NOT EDIT ###

def loss_function(real, pred):
    mask = real.ge(1).float() # Only consider non-zero inputs in the loss
    
    loss_ = F.cross_entropy(pred, real) * mask 
    return torch.mean(loss_)

def train_rnn_model(encoder, decoder, dataset, optimizer, trg_vocab, device, n_epochs):
    batch_size = dataset.batch_size
    for epoch in range(n_epochs):
        start = time.time()
        n_batch = 0
        total_loss = 0
        
        encoder.train()
        decoder.train()
        
        for src, trg in tqdm(dataset):
            n_batch += 1
            loss = 0
            
            enc_output, enc_hidden = encoder(src.transpose(0,1).to(device))
            dec_hidden = enc_hidden
            
            # use teacher forcing - feeding the target as the next input (via dec_input)
            dec_input = torch.tensor([[trg_vocab.word2idx['<start>']]] * batch_size)
        
            # run code below for every timestep in the ys batch
            for t in range(1, trg.size(1)):
                predictions, dec_hidden, _ = decoder(dec_input.to(device), dec_hidden.to(device), enc_output.to(device))
                assert len(predictions.shape) == 2 and predictions.shape[0] == dec_input.shape[0] and predictions.shape[1] == len(trg_vocab.word2idx), "First output of decoder must have shape [batch_size, vocab_size], you returned shape " + str(predictions.shape)
                loss += loss_function(trg[:, t].to(device), predictions.to(device))
                dec_input = trg[:, t].unsqueeze(1)
        
            batch_loss = (loss / int(trg.size(1)))
            total_loss += batch_loss
            
            optimizer.zero_grad()
            
            batch_loss.backward()

            ### update model parameters
            optimizer.step()
        
        print('Epoch:{:2d}/{}\t Loss: {:.4f} \t({:.2f}s)'.format(epoch + 1, n_epochs, total_loss / n_batch, time.time() - start))

    print('Model trained!')

In [11]:
if __name__ == '__main__':
    # HYPERPARAMETERS
    LEARNING_RATE = 0.001
    HIDDEN_UNITS=256
    N_EPOCHS=10

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  
    rnn_encoder = rnnencoder.RnnEncoder(src_vocab, EMBEDDING_DIM, HIDDEN_UNITS).to(device)
    rnn_decoder = rnndecoder.RnnDecoder(trg_vocab, EMBEDDING_DIM, HIDDEN_UNITS).to(device)

    rnn_model_params = list(rnn_encoder.parameters()) + list(rnn_decoder.parameters())
    optimizer = torch.optim.Adam(rnn_model_params, lr=LEARNING_RATE)

    print('Encoder and Decoder models initialized!')

Encoder and Decoder models initialized!


In [12]:
if __name__ == '__main__':
    train_rnn_model(rnn_encoder, rnn_decoder, train_dataset, optimizer, trg_vocab, device, N_EPOCHS)

  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 1/10	 Loss: 1.1896 	(21.45s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 2/10	 Loss: 0.7892 	(20.38s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 3/10	 Loss: 0.5990 	(21.07s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 4/10	 Loss: 0.4606 	(20.38s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 5/10	 Loss: 0.3535 	(20.30s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 6/10	 Loss: 0.2731 	(20.21s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 7/10	 Loss: 0.2151 	(20.16s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 8/10	 Loss: 0.1743 	(21.75s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 9/10	 Loss: 0.1459 	(21.29s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch:10/10	 Loss: 0.1267 	(20.67s)
Model trained!


### Transformer Model Training

In [27]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()
output

tensor(1.7741, grad_fn=<NllLossBackward0>)

In [13]:
import transformerencoder
import transformerdecoder

def train_transformer_model(encoder, decoder, dataset, optimizer, device, n_epochs):
    """ Model training for machine translation
    """
    encoder.train()
    decoder.train()
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    for epoch in range(n_epochs):
        start = time.time()
        losses = []

        for src, trg in tqdm(train_dataset):
            
            src = src.to(device).transpose(0,1) # [max_src_length, batch_size]
            trg = trg.to(device).transpose(0,1) # [max_trg_length, batch_size]

            enc_out = encoder(src)
            output = decoder(trg[:-1, :], enc_out)

            output = output.reshape(-1, output.shape[2])
            trg = trg[1:].reshape(-1)

            optimizer.zero_grad()

            loss = criterion(output, trg)
            losses.append(loss.item())

            loss.backward()

            # Clip to avoid exploding grading issues
            torch.nn.utils.clip_grad_norm_(encoder.parameters(), max_norm=1)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=1)

            optimizer.step()

        mean_loss = sum(losses) / len(losses)
        print('Epoch:{:2d}/{}\t Loss:{:.4f} ({:.2f}s)'.format(epoch + 1, n_epochs, mean_loss, time.time() - start))


In [14]:
if __name__ == '__main__':
    # HYPERPARAMETERS
    LEARNING_RATE = 0.001
    DIM_FEEDFORWARD=512
    N_EPOCHS=10
    N_HEADS=2
    N_LAYERS=2

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # transformer encoder model
    transformer_encoder = transformerencoder.TransformerEncoder(src_vocab, EMBEDDING_DIM, N_HEADS, 
                                 N_LAYERS,DIM_FEEDFORWARD,
                                 max_length_src, device).to(device)
    # transformer decoder model
    transformer_decoder = transformerdecoder.TransformerDecoder(trg_vocab, EMBEDDING_DIM, N_HEADS, 
                              N_LAYERS,DIM_FEEDFORWARD,
                              max_length_trg, device).to(device)

    transformer_model_params = list(transformer_encoder.parameters()) + list(transformer_decoder.parameters())
    optimizer = torch.optim.Adam(transformer_model_params, lr=LEARNING_RATE)

    print('Encoder and Decoder models have been initialized!')

Encoder and Decoder models have been initialized!


In [15]:
if __name__ == '__main__':
    train_transformer_model(transformer_encoder, transformer_decoder, train_dataset, optimizer, device, N_EPOCHS)

  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 1/10	 Loss:3.1920 (11.30s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 2/10	 Loss:2.1631 (11.16s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 3/10	 Loss:1.7098 (11.06s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 4/10	 Loss:1.3978 (11.15s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 5/10	 Loss:1.1881 (11.17s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 6/10	 Loss:1.0466 (11.03s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 7/10	 Loss:0.9450 (11.17s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 8/10	 Loss:0.8733 (11.09s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch: 9/10	 Loss:0.8167 (11.10s)


  0%|          | 0/468 [00:00<?, ?it/s]

Epoch:10/10	 Loss:0.7700 (11.30s)


# 7. Decoding output

Decoding the output to predict the sentences for unseen source sentences

### RNN Model decoding

In [16]:
import decodingalgorithm
if __name__ == '__main__':
    rnn_encoder.eval()
    rnn_decoder.eval()
    idxes = random.choices(range(len(test_dataset.dataset)), k=5)
    src, trg =  train_dataset.dataset[idxes]
    curr_output, _ = decodingalgorithm.decode_rnn_model(rnn_encoder, rnn_decoder, src.transpose(0,1).to(device), trg.size(1), device)
    for i in range(len(src)):
        print("Source sentence:", ' '.join([x for x in [src_vocab.idx2word[j.item()] for j in src[i]] if x != '<pad>']))
        print("Target sentence:", ' '.join([x for x in [trg_vocab.idx2word[j.item()] for j in trg[i]] if x != '<pad>']))
        print("Predicted sentence:", ' '.join([x for x in [trg_vocab.idx2word[j.item()] for j in curr_output[i]] if x != '<pad>']))
        print("----------------")

Source sentence: <start> go fly a kite . <end>
Target sentence: <start> degage ! <end>
Predicted sentence: <start> degage ! <end>
----------------
Source sentence: <start> he was patient . <end>
Target sentence: <start> il etait patient . <end>
Predicted sentence: <start> il etait patient . <end>
----------------
Source sentence: <start> i want to go out . <end>
Target sentence: <start> je veux sortir . <end>
Predicted sentence: <start> je veux sortir . <end>
----------------
Source sentence: <start> i love hamburgers . <end>
Target sentence: <start> j adore les hamburgers . <end>
Predicted sentence: <start> j adore les hamburgers . <end>
----------------
Source sentence: <start> this isn t enough . <end>
Target sentence: <start> ce n est pas assez . <end>
Predicted sentence: <start> ce n est pas . <end>
----------------


### Transformer Model Decoding

In [17]:
if __name__ == '__main__':
    transformer_encoder.eval()
    transformer_decoder.eval()
    idxes = random.choices(range(len(test_dataset.dataset)), k=5)
    src, trg =  train_dataset.dataset[idxes]
    curr_output, _, _ = decodingalgorithm.decode_transformer_model(transformer_encoder, transformer_decoder, src.transpose(0,1).to(device), trg.size(1), device)
    for i in range(len(src)):
        print("Source sentence:", ' '.join([x for x in [src_vocab.idx2word[j.item()] for j in src[i]] if x != '<pad>']))
        print("Target sentence:", ' '.join([x for x in [trg_vocab.idx2word[j.item()] for j in trg[i]] if x != '<pad>']))
        print("Predicted sentence:", ' '.join([x for x in [trg_vocab.idx2word[j.item()] for j in curr_output[i]] if x != '<pad>']))
        print("----------------")

Source sentence: <start> he s after me . <end>
Target sentence: <start> il est apres moi . <end>
Predicted sentence: <start> il m est apres . <end> . <end> . <end> . <end> . <end> . <end> . <end>
----------------
Source sentence: <start> i have no control . <end>
Target sentence: <start> je n exerce aucun controle . <end>
Predicted sentence: <start> je n ai pas de reponse . <end> . <end> . <end> . <end> . <end> . <end>
----------------
Source sentence: <start> i envy you . <end>
Target sentence: <start> je vous envie . <end>
Predicted sentence: <start> je vous envie de toi . <end> . <end> . <end> . <end> . <end> . <end> .
----------------
Source sentence: <start> we re clean . <end>
Target sentence: <start> nous sommes propres . <end>
Predicted sentence: <start> nous sommes propres . <end> . <end> . <end> . <end> . <end> . <end> . <end> .
----------------
Source sentence: <start> what are you hiding ? <end>
Target sentence: <start> qu es tu en train de cacher ? <end>
Predicted sentence

## Model Evaluation

Evaluation of the model is based on the blue score

### RNN Model Evaluation

In [20]:
def get_reference_candidate(target, pred, trg_vocab):
    def _to_token(sentence):
        lis = []
        for s in sentence[1:]:
            x = trg_vocab.idx2word[s]
            if x == "<end>": break
            lis.append(x)
        return lis
    reference = _to_token(list(target.numpy()))
    candidate = _to_token(list(pred.numpy()))
    return reference, candidate

def compute_bleu_scores(target_tensor_val, target_output, final_output, trg_vocab):
    bleu_1 = 0.0
    bleu_2 = 0.0
    bleu_3 = 0.0
    bleu_4 = 0.0

    smoother = SmoothingFunction()
    save_reference = []
    save_candidate = []
    for i in range(len(target_tensor_val)):
        reference, candidate = get_reference_candidate(target_output[i], final_output[i], trg_vocab)
    
        bleu_1 += sentence_bleu(reference, candidate, weights=(1,), smoothing_function=smoother.method1)
        bleu_2 += sentence_bleu(reference, candidate, weights=(1/2, 1/2), smoothing_function=smoother.method1)
        bleu_3 += sentence_bleu(reference, candidate, weights=(1/3, 1/3, 1/3), smoothing_function=smoother.method1)
        bleu_4 += sentence_bleu(reference, candidate, weights=(1/4, 1/4, 1/4, 1/4), smoothing_function=smoother.method1)

        save_reference.append(reference)
        save_candidate.append(candidate)
    
    bleu_1 = bleu_1/len(target_tensor_val)
    bleu_2 = bleu_2/len(target_tensor_val)
    bleu_3 = bleu_3/len(target_tensor_val)
    bleu_4 = bleu_4/len(target_tensor_val)

    scores = {"bleu_1": bleu_1, "bleu_2": bleu_2, "bleu_3": bleu_3, "bleu_4": bleu_4}
    print('BLEU 1-gram: %f' % (bleu_1))
    print('BLEU 2-gram: %f' % (bleu_2))
    print('BLEU 3-gram: %f' % (bleu_3))
    print('BLEU 4-gram: %f' % (bleu_4))

    return save_candidate, scores

def evaluate_rnn_model(encoder, decoder, test_dataset, target_tensor_val, device):
    trg_vocab = decoder.trg_vocab
    batch_size = test_dataset.batch_size
    n_batch = 0
    total_loss = 0

    encoder.eval()
    decoder.eval()
    
    final_output, target_output = None, None

    with torch.no_grad():
        for batch, (src, trg) in enumerate(test_dataset):
            n_batch += 1
            loss = 0
            curr_output, curr_predictions = decodingalgorithm.decode_rnn_model(encoder, decoder, src.transpose(0,1).to(device), trg.size(1), device)
            for t in range(1, trg.size(1)):
                loss += loss_function(trg[:, t].to(device), curr_predictions[:,t,:].to(device))

            if final_output is None:
                final_output = torch.zeros((len(target_tensor_val), trg.size(1)))
                target_output = torch.zeros((len(target_tensor_val), trg.size(1)))
            final_output[batch*batch_size:(batch+1)*batch_size] = curr_output
            target_output[batch*batch_size:(batch+1)*batch_size] = trg
            batch_loss = (loss / int(trg.size(1)))
            total_loss += batch_loss

        print('Loss {:.4f}'.format(total_loss / n_batch))
    
    # Compute BLEU scores
    return compute_bleu_scores(target_tensor_val, target_output, final_output, trg_vocab)

In [21]:
if __name__ == '__main__':
    rnn_save_candidate, rnn_scores = evaluate_rnn_model(rnn_encoder, rnn_decoder, test_dataset, trg_tensor_val, device)

Loss 1.6758
BLEU 1-gram: 0.235489
BLEU 2-gram: 0.073647
BLEU 3-gram: 0.056280
BLEU 4-gram: 0.054306


### Transformer Model Evaluation

In [22]:
def get_reference_candidate(target, pred, trg_vocab):
    def _to_token(sentence):
        lis = []
        for s in sentence[1:]:
            x = trg_vocab.idx2word[s]
            if x == "<end>": break
            lis.append(x)
        return lis
    reference = _to_token(list(target.numpy()))
    candidate = _to_token(list(pred.numpy()))
    return reference, candidate
    
def compute_bleu_scores(target_tensor_val, target_output, final_output, trg_vocab):
    bleu_1 = 0.0
    bleu_2 = 0.0
    bleu_3 = 0.0
    bleu_4 = 0.0

    smoother = SmoothingFunction()
    save_reference = []
    save_candidate = []
    for i in range(len(target_tensor_val)):
        reference, candidate = get_reference_candidate(target_output[i], final_output[i], trg_vocab)
    
        bleu_1 += sentence_bleu(reference, candidate, weights=(1,), smoothing_function=smoother.method1)
        bleu_2 += sentence_bleu(reference, candidate, weights=(1/2, 1/2), smoothing_function=smoother.method1)
        bleu_3 += sentence_bleu(reference, candidate, weights=(1/3, 1/3, 1/3), smoothing_function=smoother.method1)
        bleu_4 += sentence_bleu(reference, candidate, weights=(1/4, 1/4, 1/4, 1/4), smoothing_function=smoother.method1)

        save_reference.append(reference)
        save_candidate.append(candidate)
    
    bleu_1 = bleu_1/len(target_tensor_val)
    bleu_2 = bleu_2/len(target_tensor_val)
    bleu_3 = bleu_3/len(target_tensor_val)
    bleu_4 = bleu_4/len(target_tensor_val)

    scores = {"bleu_1": bleu_1, "bleu_2": bleu_2, "bleu_3": bleu_3, "bleu_4": bleu_4}
    print('BLEU 1-gram: %f' % (bleu_1))
    print('BLEU 2-gram: %f' % (bleu_2))
    print('BLEU 3-gram: %f' % (bleu_3))
    print('BLEU 4-gram: %f' % (bleu_4))

    return save_candidate, scores

def evaluate_model(encoder, decoder, test_dataset, target_tensor_val, device):
    trg_vocab = decoder.trg_vocab
    batch_size = test_dataset.batch_size
    n_batch = 0
    total_loss = 0

    encoder.eval()
    decoder.eval()
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    losses=[]
    final_output, target_output = None, None

    with torch.no_grad():
        for batch, (src, trg) in enumerate(test_dataset):
            n_batch += 1
            loss = 0
            
            src, trg = src.transpose(0,1).to(device), trg.transpose(0,1).to(device)
            curr_output, curr_predictions, enc_out = decodingalgorithm.decode_transformer_model(encoder, decoder, src, trg.size(0), device)

            for t in range(1, trg.size(0)):
                output = decoder(trg[:-1, :], enc_out)
                output = output.reshape(-1, output.shape[2])
                loss_trg = trg[1:].reshape(-1)
                loss += criterion(output, loss_trg)
                # loss += criterion(curr_predictions[:,t,:].to(device), trg[t,:].reshape(-1).to(device))

            if final_output is None:
                final_output = torch.zeros((len(target_tensor_val), trg.size(0)))
                target_output = torch.zeros((len(target_tensor_val), trg.size(0)))

            final_output[batch*batch_size:(batch+1)*batch_size] = curr_output
            target_output[batch*batch_size:(batch+1)*batch_size] = trg.transpose(0,1)
            losses.append(loss.item() / (trg.size(0)-1))

        mean_loss = sum(losses) / len(losses)
        print('Loss {:.4f}'.format(mean_loss))
    
    # Compute Bleu scores
    return compute_bleu_scores(target_tensor_val, target_output, final_output, trg_vocab)

In [23]:
if __name__ == '__main__':
    transformer_save_candidate, transformer_scores = evaluate_model(transformer_encoder, transformer_decoder, test_dataset, trg_tensor_val, device)

Loss 1.6724
BLEU 1-gram: 0.246901
BLEU 2-gram: 0.075155
BLEU 3-gram: 0.056434
BLEU 4-gram: 0.054406


**Saving Transformer Encoder and Decoder Model**

In [24]:
if __name__=='__main__':
    from google.colab import drive
    drive.mount('/content/drive')
    if rnn_encoder is not None and rnn_encoder is not None:
        print("Saving RNN model....") 
        torch.save(rnn_encoder, 'drive/MyDrive/Colab Notebooks/NMT/rnn_encoder.pt')
        torch.save(rnn_decoder, 'drive/MyDrive/Colab Notebooks/NMT//rnn_decoder.pt')
    if transformer_encoder is not None and transformer_decoder is not None:
        print("Saving Transformer model....") 
        torch.save(transformer_encoder, 'drive/MyDrive/Colab Notebooks/NMT/transformer_encoder.pt')
        torch.save(transformer_decoder, 'drive/MyDrive/Colab Notebooks/NMT/transformer_decoder.pt')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saving RNN model....
Saving Transformer model....
