# Reference: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#exercises

In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random
import os
import time

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

import gc
from tqdm import tqdm
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device('cpu')
device

device(type='cuda')

## Load Data

In [2]:
start = time.time()

if os.path.isfile('./data/valid_subset.csv'):
    # cleaned, filtered by length, 10% dataset
    df = pd.read_csv('./data/valid_subset.csv', index_col=False)
    
elif os.path.isfile('./data/valid_cleaned_data.csv'):
    # cleaned, filtered by length dataset
    df = pd.read_csv('./data/valid_cleaned_data.csv', index_col=False)
    
elif os.path.isfile('./data/cleaned_data.csv'):
    # cleaned dataset
    df = pd.read_csv('./data/cleaned_data.csv', index_col=False)
else:

    df = pd.read_csv('./data/en-fr.csv')

end = time.time()
display(end - start)

df.head()

0.3690521717071533

Unnamed: 0,en,fr
0,another easily recognizable form of ar technol...,une autre forme connue de ra est l ecran de vi...
1,however since sao paulo is a big and scattered...,cependant comme sao paulo est une grande ville...
2,this provision provides among other things tha...,cette provision prevoit notamment qu une deduc...
3,where would you expect to find a document that...,ou crois tu que tu trouveras un document qui t...
4,at the request of agency staff additional comm...,a la demande du personnel de l office royal a ...


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146993 entries, 0 to 146992
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   en      146993 non-null  object
 1   fr      146993 non-null  object
dtypes: object(2)
memory usage: 2.2+ MB


## Clean the data

In [4]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()

In [5]:
# Clean data only if not available    

start = time.time()
df.dropna(inplace=True)

if not os.path.isfile('./data/cleaned_data.csv'):
    df['en'] = df['en'].apply(lambda x: normalizeString(str(x)))
    df['fr'] = df['fr'].apply(lambda x: normalizeString(str(x)))
    df.to_csv('./data/cleaned_data.csv', index=False)
    
end = time.time()
display(end-start)

0.02051687240600586

In [6]:
### Filter dataset by length
MAX_LENGTH = 35

if not os.path.isfile('./data/valid_cleaned_data.csv'):
    df['en_len'] = df['en'].apply(lambda sent: len(sent.split(" ")))
    df['fr_len'] = df['fr'].apply(lambda sent: len(sent.split(" ")))

    df = df[df['en_len'] < MAX_LENGTH]
    df = df[df['fr_len'] < MAX_LENGTH]
    
    df = df[['en', 'fr']]
    
    df.to_csv('./data/valid_cleaned_data.csv', index=False)


df

Unnamed: 0,en,fr
0,another easily recognizable form of ar technol...,une autre forme connue de ra est l ecran de vi...
1,however since sao paulo is a big and scattered...,cependant comme sao paulo est une grande ville...
2,this provision provides among other things tha...,cette provision prevoit notamment qu une deduc...
3,where would you expect to find a document that...,ou crois tu que tu trouveras un document qui t...
4,at the request of agency staff additional comm...,a la demande du personnel de l office royal a ...
...,...,...
146988,it would be most beneficial and effective for ...,il serait tres avantageux et efficace pour le ...
146989,cost reductions particular importance shall be...,reductions des couts une importance particulie...
146990,this inconsistent approach to marking had sign...,cette methode de cotation non uniforme a eu de...
146991,testimony of lgen gervais transcripts vol,temoignage du lgne gervais transcriptions vol


In [7]:
### Prepare only subset of data
frac = 0.1

if not os.path.isfile('./data/valid_subset.csv'):
    df_subset = df.sample(frac=frac)
    df_subset.to_csv('./data/valid_subset.csv', index=False)
else:
    df_subset = df

df_subset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146993 entries, 0 to 146992
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   en      146993 non-null  object
 1   fr      146993 non-null  object
dtypes: object(2)
memory usage: 2.2+ MB


In [8]:
df_subset[df_subset.isna().any(axis=1)]

Unnamed: 0,en,fr


## Create helpers to construct vocabulary

In [9]:
SOS_token = 1
EOS_token = 2

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "PAD", 1: "SOS", 2: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [10]:
len(df_subset)

146993

## Preparing Data for training

In [26]:
gc.collect()
df_subset = df_subset.sample(frac=0.1)


def prepareData(df):
    en_lang = Lang('en')
    fr_lang = Lang('fr')
    
    en_vocab = []
    fr_vocab = []
    
    for index, row in tqdm(df.iterrows(), total=df.shape[0], position=0, leave=True):
        en_sent = row['en']
        fr_sent = row['fr']
        
        en_vocab += en_sent.split(" ")
        fr_vocab += fr_sent.split(" ")
        
    
    # Construct word2index and index2word dicts for the two languages
    en_vocab = set(en_vocab)
    fr_vocab = set(fr_vocab)
    
    en_word2index = dict([(word, i+3) for i, word in enumerate(en_vocab)])
    fr_word2index = dict([(word, i+3) for i, word in enumerate(fr_vocab)])
    
    en_index2word = {v: k for k, v in en_word2index.items()}
    fr_index2word = {v: k for k, v in fr_word2index.items()}
    
    en_lang.word2index = en_word2index
    fr_lang.word2index = fr_word2index
    
    en_lang.index2word.update(en_index2word)
    fr_lang.index2word.update(fr_index2word)
    
    en_lang.n_words = len(en_lang.index2word.keys())
    fr_lang.n_words = len(fr_lang.index2word.keys())
    
    return en_lang, fr_lang
        

start = time.time()
en_lang, fr_lang = prepareData(df_subset)
end = time.time() 
display(end - start)



100%|██████████| 14699/14699 [00:00<00:00, 26310.53it/s]


0.6491856575012207

In [27]:
df_train = df_subset.sample(frac=0.75)

df_test = pd.concat([df_subset, df_train])
df_test.drop_duplicates(keep=False, inplace=True)
df_train.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)

df_test

Unnamed: 0,en,fr
0,a provisional time schedule for the consultati...,on trouvera au tableau ci apres un calendrier ...
1,date of compliance specified in notice of defa...,date du delai specifiee dans l avis de defaut ...
2,elizabeth leboe return to intermediate lesson ...,retour a la page des plans de lecon niveau int...
3,the parks canada agency does not have the auth...,or l agence parcs canada n a pas le pouvoir de...
4,within the context of the government s securit...,assurer la securite des canadiens dans le cadr...
...,...,...
3670,today in the city of winnipeg and all across m...,aujourd hui a winnipeg et partout dans ma prov...
3671,this hearing was convened by teleconference,l audience a eu lieu par teleconference
3672,normally he would have needed only hours to qu...,normalement il aurait eu besoin de heures seul...
3673,transport canada has the lead role for policy ...,transports canada joue un role de premier plan...


In [28]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]

def get_dataloader(batch_size, en_lang, fr_lang, df):
    
    n = len(df)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    
    
    for idx, row in tqdm(df.iterrows(), total=n, position=0, leave=True):
        en_sent = row[en_lang.name]
        fr_sent = row[fr_lang.name]
        
        en_ids = indexesFromSentence(en_lang, en_sent)
        fr_ids = indexesFromSentence(fr_lang, fr_sent)
                
        en_ids.append(EOS_token)
        fr_ids.append(EOS_token)
        
        input_ids[idx, :len(en_ids)] = en_ids
        target_ids[idx, :len(fr_ids)] = fr_ids
        
    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size) #, num_workers=8)
    return train_dataloader

## Building LSTM Model

In [29]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.dropout(self.embedding(input).view(1,1,-1))
        embedded = self.embedding(input).view(1,1,-1)
        output, hidden = self.LSTM(embedded, hidden)
        return output, hidden
    
    def initHidden(self, batch_size):
        # shape = [num_lstm_layers, batch_size, hidden_size]
        encoder_state = [torch.zeros(1, batch_size, self.hidden_size, device=device),
                              torch.zeros(1, batch_size, self.hidden_size, device=device)]
        
        return encoder_state


In [30]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output, (h_n, c_n) = self.LSTM(output, hidden)
        output = self.out(output[0])
        return output, (h_n, c_n)
    
    def initHidden(self):
        """
        The spesific type of the hidden layer for the RNN type that is used (LSTM).
        :return: All zero hidden state.
        """
        return [torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device)]

    def forward_old(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.LSTM(output, hidden)
        output = self.out(output)
        return output, hidden

## Training the model

In [50]:
def train_batch(input_tensor, target_tensor, encoder, decoder, 
                encoder_optimizer, decoder_optimizer, criterion, is_training=True):
    


    batch = input_tensor.shape[0]

    encoder_hiddens_last = []
    loss = 0

    for step_idx in range(batch):
        encoder_hidden = encoder.initHidden(1)
        input_tensor_step = input_tensor[step_idx, :][input_tensor[step_idx, :] != 0]
        input_length = input_tensor_step.size(0)

        encoder_outputs = torch.zeros(batch, MAX_LENGTH, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor_step[ei], encoder_hidden)
            encoder_outputs[step_idx, ei, :] = encoder_output[0, 0]

        # only return the hidden and cell states for the last layer and pass it to the decoder
        hn, cn = encoder_hidden
        encoder_hn_last_layer = hn[-1].view(1,1,-1)
        encoder_cn_last_layer = cn[-1].view(1,1,-1)
        encoder_hidden = [encoder_hn_last_layer, encoder_cn_last_layer]

        if hn.shape[-1] != hidden_size:
            print(hn.size)
        if cn.shape[-1] != hidden_size:
            print(cn.size)
            break
        encoder_hiddens_last.append(encoder_hidden)

    if hn.shape[-1] != hidden_size:
        print(f"hn size {hn.shape[-1]} != hidden_size {hidden_size} ")
        return 
    if cn.shape[-1] != hidden_size:
        print(f"hn size {cn.shape[-1]} != hidden_size {hidden_size} ")
        return 
        

    decoder_input = torch.tensor([SOS_token], device=device)
    decoder_hiddens = encoder_hiddens_last
    
    if is_training:
        # Training: target tensor available

        for step_idx in range(batch):
            # reset the LSTM hidden state. Must be done before you run a new sequence. Otherwise the LSTM will treat
            # the new input sequence as a continuation of the previous sequence

            target_tensor_step = target_tensor[step_idx, :][target_tensor[step_idx, :] != 0]
            target_length = target_tensor_step.size(0)
            decoder_hidden = decoder_hiddens[step_idx]

            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
                # decoder_output, decoder_hidden, decoder_attention = decoder(
                #     decoder_input, decoder_hidden, encoder_outputs)

                loss += criterion(decoder_output, target_tensor_step[di].view(1)) / target_length
                decoder_input = target_tensor_step[di]  # Teacher forcing
    
    else:
        # Testing: target tensor not available 
        for step_idx in range(batch):
            # reset the LSTM hidden state. Must be done before you run a new sequence. Otherwise the LSTM will treat
            # the new input sequence as a continuation of the previous sequence

            target_tensor_step = target_tensor[step_idx, :]
            target_length = target_tensor_step[target_tensor_step != 0].size(0)
            decoder_hidden = decoder_hiddens[step_idx]

            for di in range(MAX_LENGTH):
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)

                topv, topi = decoder_output.data.topk(1)
                decoder_input = topi.squeeze().detach()
                loss += criterion(decoder_output, target_tensor_step[di].view(1)) / target_length

                if decoder_input.item() == EOS_token:
                    break


    loss = loss / batch
    
    return loss 


In [51]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion):
    
    encoder.train()
    decoder.train()

    total_loss = 0
    batch_bar   = tqdm(total=len(dataloader), dynamic_ncols=True, leave=True, position=0, desc='Train')
    
    for i, data in enumerate(dataloader):
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        
        input_tensor, target_tensor = data
        
        if device == torch.device("cuda"):
                input_tensor = input_tensor.cuda()
                target_tensor = target_tensor.cuda()
        
        loss = train_batch(input_tensor, target_tensor, encoder, decoder, 
                encoder_optimizer, decoder_optimizer, criterion)
        
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()


        total_loss += loss.item()

        batch_bar.set_postfix(train_loss="{:.04f}".format(float(total_loss / (i + 1))))
        batch_bar.update()

    return total_loss / len(dataloader)

In [None]:
hidden_size = 128
learning_rate = 0.01

gc.collect()
torch.cuda.empty_cache()

encoder = EncoderRNN(en_lang.n_words, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, fr_lang.n_words).to(device)

encoder_optimizer = optim.AdamW(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.AdamW(decoder.parameters(), lr=learning_rate)

encoder_scheduler = optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=0.95)
decoder_scheduler = optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=0.95)

criterion = nn.CrossEntropyLoss()

train_epoch(train_loader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion)

In [52]:
def test_epoch(dataloader, encoder, decoder, criterion):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=True, position=0, desc='Test')
    
    for i, data in enumerate(dataloader):
        with torch.no_grad():
            input_tensor, target_tensor = data
                    
            if device == torch.device("cuda"):
                    input_tensor = input_tensor.cuda()
                    target_tensor = target_tensor.cuda()

            loss = train_batch(input_tensor, target_tensor, encoder, decoder, 
                    encoder_optimizer, decoder_optimizer, criterion, is_training=False)

        total_loss += loss.item()
        
        batch_bar.set_postfix(test_loss="{:.04f}".format(float(total_loss / (i + 1))))
        batch_bar.update()

    return total_loss / len(dataloader)

In [48]:
gc.collect()
test_epoch(test_loader, encoder, decoder, criterion)

Test: 100%|██████████| 230/230 [01:07<00:00,  3.39it/s, test_loss=13.4483]


13.448340113266655

In [53]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [54]:
def train(train_dataloader, test_dataloader, encoder, decoder, n_epochs,
          encoder_optimizer, decoder_optimizer, encoder_scheduler, decoder_scheduler,
          criterion):
    
    start = time.time()
    train_losses = []
    test_losses = []


    for epoch in range(1, n_epochs + 1):
        print(f"Epoch {epoch} / {n_epochs}")
        
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        test_loss = test_epoch(test_dataloader, encoder, decoder, criterion)
        
        train_losses.append(loss)
        test_losses.append(test_loss)
        

        encoder_scheduler.step()
        decoder_scheduler.step()

        if epoch % 5 == 0:
            print(f"encoder lr = {encoder_scheduler.get_last_lr()}, decoder lr = {decoder_scheduler.get_last_lr()}")
            print('%s (%d %d%%)' % (timeSince(start, epoch / n_epochs),
                                        epoch, epoch / n_epochs * 100))

    showPlot(train_losses)
    showPlot(test_losses)
    
    return train_losses, test_losses

In [55]:
device

device(type='cuda')

In [56]:
batch_size = 16

train_loader = get_dataloader(batch_size, en_lang, fr_lang, df_train)
test_loader = get_dataloader(batch_size, en_lang, fr_lang, df_test)




100%|██████████| 11024/11024 [00:00<00:00, 18840.98it/s]
100%|██████████| 3675/3675 [00:00<00:00, 15214.81it/s]


In [57]:
next(iter(train_loader))[0][0]

tensor([20007,  6394, 13133, 16539,  2625,     2,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0], device='cuda:0')

In [58]:
hidden_size = 128
learning_rate = 0.01

gc.collect()
torch.cuda.empty_cache()

encoder = EncoderRNN(en_lang.n_words, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, fr_lang.n_words).to(device)

encoder_optimizer = optim.AdamW(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.AdamW(decoder.parameters(), lr=learning_rate)

encoder_scheduler = optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=0.95)
decoder_scheduler = optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=0.95)

criterion = nn.CrossEntropyLoss()




In [59]:
epochs = 5

train_losses, test_losses = train(train_loader, test_loader, encoder, decoder, epochs,
                                 encoder_optimizer, decoder_optimizer,
                                 encoder_scheduler, decoder_scheduler,
                                 criterion)

Epoch 1 / 5


Train: 100%|██████████| 689/689 [08:23<00:00,  1.37it/s, train_loss=6.4966]
Test: 100%|██████████| 230/230 [01:07<00:00,  3.39it/s, test_loss=10.2519]


Epoch 2 / 5


Train: 100%|██████████| 689/689 [08:25<00:00,  1.36it/s, train_loss=5.3924]
Test: 100%|██████████| 230/230 [01:02<00:00,  3.66it/s, test_loss=8.8360]


Epoch 3 / 5


Train: 100%|██████████| 689/689 [08:25<00:00,  1.36it/s, train_loss=4.6834]
Test: 100%|██████████| 230/230 [01:00<00:00,  3.81it/s, test_loss=9.2087]


Epoch 4 / 5


Train: 100%|██████████| 689/689 [08:23<00:00,  1.37it/s, train_loss=4.1278]
Test: 100%|██████████| 230/230 [01:03<00:00,  3.63it/s, test_loss=11.1408]


Epoch 5 / 5


Train: 100%|██████████| 689/689 [08:22<00:00,  1.37it/s, train_loss=3.7004]
Test: 100%|██████████| 230/230 [01:05<00:00,  3.53it/s, test_loss=11.9405]

encoder lr = [0.007737809374999999], decoder lr = [0.007737809374999999]
47m 19s (- 0m 0s) (5 100%)





In [60]:
test_losses

[10.25186823554661,
 8.835960566479226,
 9.20870773066645,
 11.140752732235452,
 11.940462767559548]

In [61]:
test_loss = sum(test_losses) / len(test_losses)

torch.save({
            'epoch': epochs,
            'encoder_state_dict': encoder.state_dict(),
            'decoder_state_dict': decoder.state_dict(),
            'encoder_optimizer_state_dict': encoder_optimizer.state_dict(),
            'decoder_optimizer_state_dict': decoder_optimizer.state_dict(),
            'criterion': criterion
            }, f'./checkpoints/checkpoint_epoch{epochs}_testloss{test_loss:.4f}.pth')

In [None]:
import datetime


x = datetime.datetime.now()
print(f"Finished at {x}")

## Evaluate the model

In [None]:
### Optionally load trained model
encoder_trained = EncoderRNN(en_lang.n_words, hidden_size).to(device)
decoder_trained = DecoderRNN(hidden_size, fr_lang.n_words).to(device)

checkpoint = torch.load('./checkpoints/checkpoint_epoch50_testloss2.9368')

encoder_trained.load_state_dict(checkpoint['encoder_state_dict'])
decoder_trained.load_state_dict(checkpoint['decoder_state_dict'])


In [None]:
def evaluate_old(encoder, decoder, sentence, input_lang, output_lang):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        print(input_tensor)
        
        # encoder_hidden = encoder.initHidden(input_tensor.shape[0])
        #encoder_hidden = encoder.initHidden(1)

        encoder_outputs, encoder_hidden = encoder(input_tensor, encoder_hidden)
        decoder_outputs, decoder_hidden, _ = decoder(encoder_outputs, encoder_hidden)

        _, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()

        decoded_words = []
        for idx in decoded_ids:
            if idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            decoded_words.append(output_lang.index2word[idx.item()])
    return decoded_words

In [62]:
def evaluate(encoder, decoder, input_tensor, input_lang, output_lang):

    # Required for tensor matching.
    # Remove to see the results for educational purposes.
    max_length=MAX_LENGTH
    
    with torch.no_grad():

        # Initialize the encoder hidden.
        #input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.initHidden(1)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor[ei], encoder_hidden)

        # only return the hidden and cell states for the last layer and pass it to the decoder
        hn, cn = encoder_hidden
        encoder_hn_last_layer = hn[-1].view(1,1,-1)
        encoder_cn_last_layer = cn[-1].view(1,1,-1)
        encoder_hidden_last = [encoder_hn_last_layer, encoder_cn_last_layer]

        decoder_input = torch.tensor([SOS_token], device=device)  # SOS
        #encoder_hidden_last = [bridge(item) for item in encoder_hidden_last]
        decoder_hidden = encoder_hidden_last

        decoded_words = []
        # decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            # decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        # return decoded_words, decoder_attentions[:di + 1]
        return decoded_words

In [None]:
def evaluateRandomly_old(encoder, decoder, n=10):
    for i in range(n):
        print(f"Testing {i+1} / {n}")
        pair = df_test.sample(1).values.tolist()[0]
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0], en_lang, fr_lang)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [63]:
def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def SentenceFromTensor(lang, tensor):
    indexes = tensor.squeeze()
    indexes = indexes.tolist()
    return [lang.index2word[index] for index in indexes]

def reformat_tensor_mask(tensor):
    tensor = tensor.squeeze(dim=1)
    tensor = tensor.transpose(1,0)
    mask = tensor != 0
    return tensor, mask

In [66]:
def evaluateRandomly(encoder, decoder, input_lang, output_lang, n=batch_size):
    example = next(iter(test_loader))
    for i in range(n):
        #pair = testset[i]['sentence']
        #pair = [example[0][i], example[1][i]]
        #input_tensor, mask_input = reformat_tensor_mask(pair[:,0,:].view(1,1,-1))
        pair = df_test.sample(1).values.tolist()[0]
        input_tensor = example[0][i]
        input_tensor = tensorFromSentence(input_lang, pair[0])
        input_tensor = input_tensor[input_tensor != 0]
        
        #output_tensor, mask_output = reformat_tensor_mask(pair[:,1,:].view(1,1,-1))
        output_tensor = example[1][i]
        output_tensor = tensorFromSentence(output_lang, pair[1])
        output_tensor = output_tensor[output_tensor != 0]
        
        if device == torch.device("cuda"):
            input_tensor = input_tensor.cuda()
            output_tensor = output_tensor.cuda()

        input_sentence = ' '.join(SentenceFromTensor(input_lang, input_tensor))
        output_sentence = ' '.join(SentenceFromTensor(output_lang, output_tensor))
        print(f"Test {i+1}/{n}")
        print('> ', input_sentence)
        print('= ', output_sentence)
        output_words = evaluate(encoder, decoder, input_tensor, en_lang, fr_lang)
        output_sentence = ' '.join(output_words)
        print('< ', output_sentence)
        print('')

In [67]:
encoder.eval()
decoder.eval()
evaluateRandomly(encoder, decoder, en_lang, fr_lang)

Test 1/16
>  ms courchesne is currently director general of the montreal symphony orchestra and from to was deputy minister of the department of culture and communications with the government of quebec EOS
=  mme courchesne est directrice generale de l orchestre symphonique de montreal et de a a ete sous ministre au ministere de la culture et des communications du gouvernement du quebec EOS
<  les activites de recherche et de la facon dont on a la fois les activites de la loi sur les activites de la commission du travail et de la loi sur les langues officielles <EOS>

Test 2/16
>  the end result is that selection according to merit is in doubt in the outcome of this competition EOS
=  le resultat final est que la selection au merite est mise en doute dans le resultat de ce concours EOS
<  le present rapport visera a une echelle exacte de la loi sur les jeunes qui sont souvent <EOS>

Test 3/16
>  in his opinion a process that should take weeks can take months EOS
=  l acia s attend a ce