In [20]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [21]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Loading data files
==================

In [22]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [23]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [24]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    file1 = open('training/news-commentary-v9.%s-%s.%s' % (lang1, lang2, lang1), mode = 'rb')
    file2 = open('training/news-commentary-v9.%s-%s.%s' % (lang1, lang2, lang2), mode = 'rb')

    # Split every line into pairs and normalize
    
    line1 = file1.read().split(b'\n')
    line2 = file2.read().split(b'\n')
    #pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    pairs = []
    
    #print(len(line1))
    #print(len(line2))
    for i in range(len(line1)):
        pairs.append([line1[i].decode('UTF8'),line2[i].decode('UTF8')])
    
    # Reverse pairs, make Lang instance    
    
    if reverse:
        #pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [25]:
_,_,a=readLangs('fr','en')

Reading lines...


In [26]:
a[30000]

['Tandis que le commerce bilatéral a augmenté, et que les relations diplomatiques se sont consolidées au cours des dernières années, une certaine ignorance persiste entre les deux régions et dans certains cas, les tensions s’intensifient.',
 'While bilateral trade has increased and diplomatic relations have strengthened in the past few years, a lack of knowledge persists between the two regions, and in some cases tensions are growing.']

In [27]:
MAX_LENGTH = 40


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH 

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [28]:
def prepareData(lang1,lang2, part):
    input_lang, output_lang, pairs = readLangs(lang1,lang2)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    #print("Trimmed to %s sentence pairs" % len(pairs))
    
    # collect test pairs
    num_test = int(len(pairs)*0.2)
    print("Number of test pairs:", num_test)
    random.shuffle(pairs)
    test_pairs = pairs[num_test*(part-1):num_test*part]
    set_test_eng = set([sent_eng for sent_eng, _ in test_pairs])
    
    test_pair_dict = {}
    for sent_eng, sent_fre in pairs:
        if sent_eng not in set_test_eng:
            continue 
        elif sent_eng not in test_pair_dict:
            test_pair_dict[sent_eng] = set([sent_fre])
        else:
            test_pair_dict[sent_eng].add(sent_fre)
    test_pairs = [(sent_eng, list(test_pair_dict[sent_eng])) for sent_eng in test_pair_dict]
    print("Number of test cases (sent + list):", len(test_pairs))
    
    # collect train pairs
    train_pairs = [(sent_eng, sent_fre) for sent_eng, sent_fre in (pairs[0:num_test*(part-1)]+pairs[num_test*part:]) if sent_eng not in set_test_eng]
    print("Number of train pairs:", len(train_pairs))
    
    print("Counting words...")
    for pair in train_pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])

    return input_lang, output_lang, train_pairs, test_pairs



In [29]:
input_lang_fr_en1, output_lang_fr_en1,train_pairs_fr_en1, test_pairs_fr_en1 = prepareData('fr','en',1)
input_lang_fr_en2, output_lang_fr_en2,train_pairs_fr_en2, test_pairs_fr_en2 = prepareData('fr','en',2)
input_lang_fr_en3, output_lang_fr_en3,train_pairs_fr_en3, test_pairs_fr_en3 = prepareData('fr','en',3)
input_lang_fr_en4, output_lang_fr_en4,train_pairs_fr_en4, test_pairs_fr_en4 = prepareData('fr','en',4)
input_lang_fr_en5, output_lang_fr_en5,train_pairs_fr_en5, test_pairs_fr_en5 = prepareData('fr','en',5)

Reading lines...
Read 183252 sentence pairs
Number of test pairs: 31081
Number of test cases (sent + list): 31024
Number of train pairs: 124001
Counting words...
Reading lines...
Read 183252 sentence pairs
Number of test pairs: 31081
Number of test cases (sent + list): 31020
Number of train pairs: 124008
Counting words...
Reading lines...
Read 183252 sentence pairs
Number of test pairs: 31081
Number of test cases (sent + list): 31028
Number of train pairs: 123992
Counting words...
Reading lines...
Read 183252 sentence pairs
Number of test pairs: 31081
Number of test cases (sent + list): 31003
Number of train pairs: 124019
Counting words...
Reading lines...
Read 183252 sentence pairs
Number of test pairs: 31081
Number of test cases (sent + list): 31024
Number of train pairs: 124010
Counting words...


In [30]:
print(test_pairs_fr_en1[20000])

('Ils reviendront en Russie (attirés aussi par des salaires élevés) pour se rendre célèbre et faire la fierté de leur mère patrie.', ['They will come back to Russia (also drawn by high salaries) to make themselves famous and their motherland proud.'])


In [31]:

input_lang_ru_en1, output_lang_ru_en1,train_pairs_ru_en1, test_pairs_ru_en1 = prepareData('ru','en',1)
#input_lang_ru_en2, output_lang_ru_en2,train_pairs_ru_en2, test_pairs_ru_en2 = prepareData('ru','en',2)
#input_lang_ru_en3, output_lang_ru_en3,train_pairs_ru_en3, test_pairs_ru_en3 = prepareData('ru','en',3)
#input_lang_ru_en4, output_lang_ru_en4,train_pairs_ru_en4, test_pairs_ru_en4 = prepareData('ru','en',4)
#input_lang_ru_en5, output_lang_ru_en5,train_pairs_ru_en5, test_pairs_ru_en5 = prepareData('ru','en',5)


Reading lines...
Read 165603 sentence pairs
Number of test pairs: 27805
Number of test cases (sent + list): 27773
Number of train pairs: 110985
Counting words...


In [32]:
print(random.choice(train_pairs_ru_en1))

('Но формула роста, на которую долгое время опирался экономический успех Южной Кореи – разновидность государственного капитализма, основанного на экспортно-ориентированном производстве – больше не работает для многих корейцев.', 'But the growth formula that long underpinned South Korea’s success – a form of state-guided capitalism that focuses on export-led manufacturing – is no longer working for many South Koreans.')


In [33]:

input_lang_cs_en1, output_lang_cs_en1,train_pairs_cs_en1, test_pairs_cs_en1 = prepareData('cs','en',1)
#input_lang_cs_en2, output_lang_cs_en2,train_pairs_cs_en2, test_pairs_cs_en2 = prepareData('cs','en',2)
#input_lang_cs_en3, output_lang_cs_en3,train_pairs_cs_en3, test_pairs_cs_en3 = prepareData('cs','en',3)
#input_lang_cs_en4, output_lang_cs_en4,train_pairs_cs_en4, test_pairs_cs_en4 = prepareData('cs','en',4)
#input_lang_cs_en5, output_lang_cs_en5,train_pairs_cs_en5, test_pairs_cs_en5 = prepareData('cs','en',5)
print(random.choice(train_pairs_cs_en1))


Reading lines...
Read 146550 sentence pairs
Number of test pairs: 27269
Number of test cases (sent + list): 27217
Number of train pairs: 108767
Counting words...
('V tomto směru je výkon Evropy systematicky nižší než výkon USA: v průměru o 30 %, podle jednotlivých zemí pak například v Británii o 43 % a v Německu o 56 %.', "Here Europe's performance is consistently below that of America: 30% on average, with the U.K. 43% and Germany 56% below the US.")


In [34]:

input_lang_de_en1, output_lang_de_en1,train_pairs_de_en1, test_pairs_de_en1 = prepareData('de','en',1)
#input_lang_de_en2, output_lang_de_en2,train_pairs_de_en2, test_pairs_de_en2 = prepareData('de','en',2)
#input_lang_de_en3, output_lang_de_en3,train_pairs_de_en3, test_pairs_de_en3 = prepareData('de','en',3)
#input_lang_de_en4, output_lang_de_en4,train_pairs_de_en4, test_pairs_de_en4 = prepareData('de','en',4)
#input_lang_de_en5, output_lang_de_en5,train_pairs_de_en5, test_pairs_de_en5 = prepareData('de','en',5)
print(random.choice(train_pairs_de_en1))


Reading lines...
Read 201289 sentence pairs
Number of test pairs: 36365
Number of test cases (sent + list): 36258
Number of train pairs: 145033
Counting words...
('Mehr noch: Die meisten entdecken, dass, wenn sich die Versammlung tatsächlich einmal mit konkreten Vorschlägen befasst – was selten vorkommt –, diese Vorschläge nicht nach ihrem Geschmack sind.', 'Moreover, most find that when the Assembly gets to deal with specific proposals, which is rare, the proposals are disagreeable.')


In [35]:
#print(random.choice(train_pairs_de_en2))

The Seq2Seq Model
=================

The Encoder
-----------

In [36]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

The Decoder
-----------

In [37]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Attention in Turorial

In [38]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Attention in the lecture

In [39]:
class AttnDecoderRNN_lecture(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN_lecture, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        output = embedded
        output, hidden2 = self.gru(output, hidden)
        
        attn_weights = F.softmax(self.attn(torch.cat((output[0], hidden[0]),1)), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
        
        output = torch.cat((output[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Multiplicative attention

In [40]:
class AttnDecoderRNN_mul(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN_mul, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        
        self.mul = nn.Linear(self.hidden_size,self.hidden_size)

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)
        
        output = embedded
        output, hidden2 = self.gru(embedded, hidden)
        
        attn_weights = F.softmax(self.attn(torch.cat((self.mul(output[0]), hidden[0]), 1)), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
        
        output = torch.cat((output[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Additive attention

In [41]:
class AttnDecoderRNN_add(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN_add, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        
        self.w1 = nn.Linear(self.hidden_size,self.hidden_size)
        self.w2 = nn.Linear(self.hidden_size,self.hidden_size)
        self.w3 = nn.Linear(self.hidden_size,self.max_length)

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)
        
        output = embedded
        output, hidden2 = self.gru(embedded, hidden)
        
        attn_weights = F.softmax(self.w3(torch.tanh(self.w1(output[0])+ self.w2(hidden[0]))), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
        
        output = torch.cat((output[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)



Training
========

Preparing Training Data
-----------------------

In [42]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ') if word in lang.word2index]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(input_lang,output_lang,pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

Training the Model
------------------

In [43]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [44]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [53]:
def trainIters(input_lang,output_lang,train_pairs, encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.02):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(input_lang,output_lang,random.choice(train_pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    #showPlot(plot_losses)

In [46]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

Evaluation
==========

In [47]:
def evaluate(input_lang,output_lang,encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [48]:
class BeamSearchNode(object):
    def __init__(self, hiddenstate, previousNode, wordId, logProb, length):
        '''
        :param hiddenstate:
        :param previousNode:
        :param wordId:
        :param logProb:
        :param length:
        '''
        self.h = hiddenstate
        self.prevNode = previousNode
        self.wordid = wordId
        self.logp = logProb
        self.leng = length

    def eval(self, alpha=1.0):
        reward = 0
        # Add here a function for shaping a reward

        return self.logp / float(self.leng - 1 + 1e-6) + alpha * reward
    
    def eval_changenorm(self, alpha=1.0):
        reward = 0

        return self.logp / float(np.sqrt(self.leng) - 1 + 1e-6) + alpha * reward
    
    def __lt__(self, other):
        return self.eval() < other.eval()


from queue import PriorityQueue

def evaluate_beam_search(input_lang,output_lang,encoder, decoder, sentence, max_length=MAX_LENGTH, beam_size=2):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        # Number of sentence to generate
        endnodes = []
        number_required = 1
        
        # starting node -  hidden vector, previous node, word id, logp, length
        node = BeamSearchNode(decoder_hidden, None, decoder_input, 0, 1)
        nodes = PriorityQueue()

        # start the queue
        nodes.put((-node.eval(), node))
        qsize = 1
        
        # start beam search
        while True:
            # give up when decoding takes too long
            if qsize > 2000: break

            # fetch the best node
            score, n = nodes.get()
            decoder_input = n.wordid
            decoder_hidden = n.h

            if n.wordid.item() == EOS_token and n.prevNode != None:
                endnodes.append((score, n))
                # if we reached maximum # of sentences required
                if len(endnodes) >= number_required:
                    break
                else:
                    continue
            #elif n.leng > max_length:
            #    continue

            # decode for one step using decoder
            decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)

            # PUT HERE REAL BEAM SEARCH OF TOP
            log_prob, indexes = torch.topk(decoder_output, beam_size)
            nextnodes = []

            for new_k in range(beam_size):
                decoded_t = indexes[0][new_k].view(1, -1)
                log_p = log_prob[0][new_k].item()

                node = BeamSearchNode(decoder_hidden, n, decoded_t, n.logp + log_p, n.leng + 1)
                score = -node.eval()
                nextnodes.append((score, node))

            # put them into queue
            for i in range(len(nextnodes)):
                score, nn = nextnodes[i]
                nodes.put((score, nn))
                
            # increase qsize
            qsize += len(nextnodes) - 1
            
        # choose nbest paths, back trace them
        if len(endnodes) == 0:
            endnodes = [nodes.get() for _ in range(number_required)]

        _, n = endnodes[0]
        utterance = []
        utterance.append(output_lang.index2word[n.wordid.item()])
        
        # back trace
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(output_lang.index2word[n.wordid.item()])

        utterance = utterance[::-1]
            
    return utterance, None

def evaluate_beam_search_changenorm(input_lang,output_lang,encoder, decoder, sentence, max_length=MAX_LENGTH, beam_size=2):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        # Number of sentence to generate
        endnodes = []
        number_required = 1
        
        # starting node -  hidden vector, previous node, word id, logp, length
        node = BeamSearchNode(decoder_hidden, None, decoder_input, 0, 1)
        nodes = PriorityQueue()

        # start the queue
        nodes.put((-node.eval_changenorm(), node))
        qsize = 1
        
        # start beam search
        while True:
            # give up when decoding takes too long
            if qsize > 2000: break

            # fetch the best node
            score, n = nodes.get()
            decoder_input = n.wordid
            decoder_hidden = n.h

            if n.wordid.item() == EOS_token and n.prevNode != None:
                endnodes.append((score, n))
                # if we reached maximum # of sentences required
                if len(endnodes) >= number_required:
                    break
                else:
                    continue
            #elif n.leng > max_length:
            #    continue

            # decode for one step using decoder
            decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)

            # PUT HERE REAL BEAM SEARCH OF TOP
            log_prob, indexes = torch.topk(decoder_output, beam_size)
            nextnodes = []

            for new_k in range(beam_size):
                decoded_t = indexes[0][new_k].view(1, -1)
                log_p = log_prob[0][new_k].item()

                node = BeamSearchNode(decoder_hidden, n, decoded_t, n.logp + log_p, n.leng + 1)
                score = -node.eval_changenorm()
                nextnodes.append((score, node))

            # put them into queue
            for i in range(len(nextnodes)):
                score, nn = nextnodes[i]
                nodes.put((score, nn))
                
            # increase qsize
            qsize += len(nextnodes) - 1
            
        # choose nbest paths, back trace them
        if len(endnodes) == 0:
            endnodes = [nodes.get() for _ in range(number_required)]

        _, n = endnodes[0]
        utterance = []
        utterance.append(output_lang.index2word[n.wordid.item()])
        
        # back trace
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(output_lang.index2word[n.wordid.item()])

        utterance = utterance[::-1]
            
    return utterance, None

def evaluate_beam_search_changelength(input_lang,output_lang,encoder, decoder, sentence, max_length=MAX_LENGTH, beam_size=2):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        # Number of sentence to generate
        endnodes = []
        number_required = 1
        
        # starting node -  hidden vector, previous node, word id, logp, length
        node = BeamSearchNode(decoder_hidden, None, decoder_input, 0, 1)
        nodes = PriorityQueue()

        # start the queue
        nodes.put((-node.eval(), node))
        qsize = 1
        
        # start beam search
        while True:
            # give up when decoding takes too long
            if qsize > 3000: break

            # fetch the best node
            score, n = nodes.get()
            decoder_input = n.wordid
            decoder_hidden = n.h

            if n.wordid.item() == EOS_token and n.prevNode != None:
                endnodes.append((score, n))
                # if we reached maximum # of sentences required
                if len(endnodes) >= number_required:
                    break
                else:
                    continue
            #elif n.leng > max_length:
            #    continue

            # decode for one step using decoder
            decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)

            # PUT HERE REAL BEAM SEARCH OF TOP
            log_prob, indexes = torch.topk(decoder_output, beam_size)
            nextnodes = []

            for new_k in range(beam_size):
                decoded_t = indexes[0][new_k].view(1, -1)
                log_p = log_prob[0][new_k].item()

                node = BeamSearchNode(decoder_hidden, n, decoded_t, n.logp + log_p, n.leng + 1)
                score = -node.eval()
                nextnodes.append((score, node))

            # put them into queue
            for i in range(len(nextnodes)):
                score, nn = nextnodes[i]
                nodes.put((score, nn))
                
            # increase qsize
            qsize += len(nextnodes) - 1
            
        # choose nbest paths, back trace them
        if len(endnodes) == 0:
            endnodes = [nodes.get() for _ in range(number_required)]

        _, n = endnodes[0]
        utterance = []
        utterance.append(output_lang.index2word[n.wordid.item()])
        
        # back trace
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(output_lang.index2word[n.wordid.item()])

        utterance = utterance[::-1]
            
    return utterance, None

In [49]:
def evaluateRandomly(input_lang, output_lang,encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(test_pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(input_lang, output_lang,encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [50]:
from nltk.translate.bleu_score import corpus_bleu

def evaluateBleu(input_lang, output_lang,encoder, decoder, pairs):
    references, candidates = [], []
    for sent_eng, sents_fre in pairs:
        sents_fre = [sent_fre.split(' ') for sent_fre in sents_fre]
        output_words, _ = evaluate(input_lang, output_lang,encoder, decoder, sent_eng)
        references.append(sents_fre)
        candidates.append(output_words)
    score = corpus_bleu(references, candidates)
    return score

In [51]:
from nltk.translate.bleu_score import corpus_bleu

def evaluateBleu_beam_search(input_lang, output_lang, encoder, decoder, beam_size, pairs):
    references, candidates = [], []
    for sent_eng, sents_fre in pairs:
        sents_fre = [sent_fre.split(' ') for sent_fre in sents_fre]
        output_words, _ = evaluate_beam_search(input_lang, output_lang,encoder, decoder, sent_eng, beam_size=beam_size)
        references.append(sents_fre)
        candidates.append(output_words)
    score = corpus_bleu(references, candidates)
    return score

In [None]:

def evaluateBleu_beam_search_changenorm(input_lang, output_lang, encoder, decoder, beam_size, pairs):
    references, candidates = [], []
    for sent_eng, sents_fre in pairs:
        sents_fre = [sent_fre.split(' ') for sent_fre in sents_fre]
        output_words, _ = evaluate_beam_search_changenorm(input_lang, output_lang,encoder, decoder, sent_eng, beam_size=beam_size)
        references.append(sents_fre)
        candidates.append(output_words)
    score = corpus_bleu(references, candidates)
    return score

In [None]:

def evaluateBleu_beam_search_changelength(input_lang, output_lang, encoder, decoder, beam_size, pairs):
    references, candidates = [], []
    for sent_eng, sents_fre in pairs:
        sents_fre = [sent_fre.split(' ') for sent_fre in sents_fre]
        output_words, _ = evaluate_beam_search_changelength(input_lang, output_lang,encoder, decoder, sent_eng, beam_size=beam_size)
        references.append(sents_fre)
        candidates.append(output_words)
    score = corpus_bleu(references, candidates)
    return score

Training and Evaluating
=======================

## french_english evaluation

In [54]:
hidden_size = 256
encoder_fr_en1 = EncoderRNN(input_lang_fr_en1.n_words, hidden_size).to(device)
attn_decoder_fr_en1 = AttnDecoderRNN(hidden_size, output_lang_fr_en1.n_words, dropout_p=0.1).to(device)

encoder_fr_en2 = EncoderRNN(input_lang_fr_en2.n_words, hidden_size).to(device)
attn_decoder_fr_en2 = AttnDecoderRNN(hidden_size, output_lang_fr_en2.n_words, dropout_p=0.1).to(device)

encoder_fr_en3 = EncoderRNN(input_lang_fr_en3.n_words, hidden_size).to(device)
attn_decoder_fr_en3 = AttnDecoderRNN(hidden_size, output_lang_fr_en3.n_words, dropout_p=0.1).to(device)

encoder_fr_en4 = EncoderRNN(input_lang_fr_en4.n_words, hidden_size).to(device)
attn_decoder_fr_en4 = AttnDecoderRNN(hidden_size, output_lang_fr_en4.n_words, dropout_p=0.1).to(device)

encoder_fr_en5 = EncoderRNN(input_lang_fr_en5.n_words, hidden_size).to(device)
attn_decoder_fr_en5 = AttnDecoderRNN(hidden_size, output_lang_fr_en5.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_fr_en1,output_lang_fr_en1, train_pairs_fr_en1, encoder_fr_en1, attn_decoder_fr_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)
print(score11)
#score11 = evaluateBleu_beam_search(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1,10, test_pairs_fr_en1)
print(score11)

8m 53s (- 17m 47s) (5000 33%) 6.5990
18m 6s (- 9m 3s) (10000 66%) 6.3757
27m 14s (- 0m 0s) (15000 100%) 6.2638
0.001984157908722032
0.001984157908722032


In [55]:
trainIters(input_lang_fr_en2,output_lang_fr_en2,train_pairs_fr_en2, encoder_fr_en2, attn_decoder_fr_en2, 15000, print_every=5000)
trainIters(input_lang_fr_en3,output_lang_fr_en3,train_pairs_fr_en3, encoder_fr_en3, attn_decoder_fr_en3, 15000, print_every=5000)
trainIters(input_lang_fr_en4,output_lang_fr_en4,train_pairs_fr_en4, encoder_fr_en4, attn_decoder_fr_en4, 15000, print_every=5000)
trainIters(input_lang_fr_en5,output_lang_fr_en5,train_pairs_fr_en5, encoder_fr_en5, attn_decoder_fr_en5, 15000, print_every=5000)

#score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)
score12 = evaluateBleu(input_lang_fr_en2,output_lang_fr_en2,encoder_fr_en2, attn_decoder_fr_en2, test_pairs_fr_en2)
score13 = evaluateBleu(input_lang_fr_en3,output_lang_fr_en3,encoder_fr_en3, attn_decoder_fr_en3, test_pairs_fr_en3)
score14 = evaluateBleu(input_lang_fr_en4,output_lang_fr_en4,encoder_fr_en4, attn_decoder_fr_en4, test_pairs_fr_en4)
score15 = evaluateBleu(input_lang_fr_en5,output_lang_fr_en5,encoder_fr_en5, attn_decoder_fr_en5, test_pairs_fr_en5)

score_fr_en = score11+score12+score13+score14+score15
print('avg BLEU score(fr-en) avg'+ str(score_fr_en))

score12 = evaluateBleu_beam_search(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1,2, test_pairs_fr_en1)
score_fr_en_beam = score12
print('avg BLEU score with beam search(fr-en)'+ str(score_fr_en_beam))


8m 49s (- 17m 39s) (5000 33%) 6.6270
18m 11s (- 9m 5s) (10000 66%) 6.5067
27m 27s (- 0m 0s) (15000 100%) 6.3153
8m 55s (- 17m 51s) (5000 33%) 6.7682
17m 59s (- 8m 59s) (10000 66%) 6.4457
27m 11s (- 0m 0s) (15000 100%) 6.2879
9m 3s (- 18m 7s) (5000 33%) 6.6314
18m 10s (- 9m 5s) (10000 66%) 6.4738
27m 30s (- 0m 0s) (15000 100%) 6.3483
8m 52s (- 17m 44s) (5000 33%) 6.5984
18m 0s (- 9m 0s) (10000 66%) 6.4906
27m 16s (- 0m 0s) (15000 100%) 6.3664
avg BLEU score(fr-en) avg0.009865307610083805
avg BLEU score with beam search(fr-en)0.0021557481649198073


NameError: name 'evaluateBleu_beam_search_changenorm' is not defined

In [None]:
score12 = evaluateBleu_beam_search_changenorm(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1,2, test_pairs_fr_en1)
score_fr_en_beam = score12
print('avg BLEU score with beam search change norm(fr-en)'+ str(score_fr_en_beam))

score13 = evaluateBleu_beam_search_changelength(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1,2, test_pairs_fr_en1)
score_fr_en_beam = score13
print('avg BLEU score with beam search change length(fr-en)'+ str(score_fr_en_beam))

## Czech-English Evaluation

In [56]:
encoder_cs_en1 = EncoderRNN(input_lang_cs_en1.n_words, hidden_size).to(device)
attn_decoder_cs_en1 = AttnDecoderRNN(hidden_size, output_lang_cs_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_cs_en1,output_lang_cs_en1,train_pairs_cs_en1, encoder_cs_en1, attn_decoder_cs_en1, 15000, print_every=5000)

score14 = evaluateBleu(input_lang_cs_en1,output_lang_cs_en1,encoder_cs_en1, attn_decoder_cs_en1, test_pairs_cs_en1)
score_cs_en = score14
print('avg BLEU score(cs-en)'+ str(score_cs_en))

score15 = evaluateBleu_beam_search(input_lang_cs_en1,output_lang_cs_en1,encoder_cs_en1, attn_decoder_cs_en1,2, test_pairs_cs_en1)
score_cs_en_beam = score15
print('avg BLEU score with beam search(cs-en)'+ str(score_cs_en_beam))

9m 22s (- 18m 45s) (5000 33%) 6.5626
19m 11s (- 9m 35s) (10000 66%) 6.5008
28m 58s (- 0m 0s) (15000 100%) 6.3724
avg BLEU score(cs-en)0.0015344943690661673
avg BLEU score with beam search(cs-en)0.002016930280540274


## Russian-English Evaluation

In [57]:
encoder_ru_en1 = EncoderRNN(input_lang_ru_en1.n_words, hidden_size).to(device)
attn_decoder_ru_en1 = AttnDecoderRNN(hidden_size, output_lang_ru_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_ru_en1,output_lang_ru_en1,train_pairs_ru_en1, encoder_ru_en1, attn_decoder_ru_en1, 15000, print_every=5000)

score16 = evaluateBleu(input_lang_ru_en1,output_lang_ru_en1,encoder_ru_en1, attn_decoder_ru_en1, test_pairs_ru_en1)
score_ru_en = score16
print('avg BLEU score(ru-en)'+ str(score_ru_en))

score17 = evaluateBleu_beam_search(input_lang_ru_en1,output_lang_ru_en1,encoder_ru_en1, attn_decoder_ru_en1,2, test_pairs_ru_en1)
score_ru_en_beam = score17
print('avg BLEU score with beam search(ru-en)'+ str(score_ru_en_beam))

21m 29s (- 42m 58s) (5000 33%) 6.4440
43m 58s (- 21m 59s) (10000 66%) 6.4212
66m 34s (- 0m 0s) (15000 100%) 6.2962
avg BLEU score(ru-en)0.001300824132319994
avg BLEU score with beam search(ru-en)0.0017648643208576394


## German-English Evaluation

In [None]:
encoder_de_en1 = EncoderRNN(input_lang_de_en1.n_words, hidden_size).to(device)
attn_decoder_de_en1 = AttnDecoderRNN(hidden_size, output_lang_de_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_de_en1,output_lang_de_en1,train_pairs_de_en1, encoder_de_en1, attn_decoder_de_en1, 15000, print_every=5000)

score18 = evaluateBleu(input_lang_de_en1,output_lang_de_en1,encoder_de_en1, attn_decoder_de_en1, test_pairs_de_en1)
score_de_en = score18
print('avg BLEU score(de-en)'+ str(score_de_en))

score19 = evaluateBleu_beam_search(input_lang_de_en1,output_lang_de_en1,encoder_de_en1, attn_decoder_de_en1,2, test_pairs_de_en1)
score_de_en_beam = score19
print('avg BLEU score with beam search(de-en)'+ str(score_de_en_beam))

# Different attentions

## attention in tutorial

In [62]:
torch.cuda.empty_cache()

In [None]:
hidden_size = 256
encoder_fr_en1 = EncoderRNN(input_lang_fr_en1.n_words, hidden_size).to(device)
attn_decoder_fr_en1 = AttnDecoderRNN(hidden_size, output_lang_fr_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_fr_en1,output_lang_fr_en1,train_pairs_fr_en1, encoder_fr_en1, attn_decoder_fr_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)

print(' BLEU score for attention in tutorial(fr-en)')
print(score11)


## attention in lecture

In [None]:
encoder_fr_en1 = EncoderRNN(input_lang_fr_en1.n_words, hidden_size).to(device)
attn_decoder_fr_en1 = AttnDecoderRNN_lecture(hidden_size, output_lang_fr_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_fr_en1,output_lang_fr_en1,train_pairs_fr_en1, encoder_fr_en1, attn_decoder_fr_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)

print(' BLEU score for attention in lecture(fr-en)')
print(score11)


In [None]:
encoder_cs_en1 = EncoderRNN(input_lang_cs_en1.n_words, hidden_size).to(device)
attn_decoder_cs_en1 = AttnDecoderRNN_lecture(hidden_size, output_lang_cs_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_cs_en1,output_lang_cs_en1,train_pairs_cs_en1, encoder_cs_en1, attn_decoder_cs_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_cs_en1,output_lang_cs_en1,encoder_cs_en1, attn_decoder_cs_en1, test_pairs_cs_en1)

print(' BLEU score for attention in lecture(cs-en)')
print(score11)


In [None]:
encoder_ru_en1 = EncoderRNN(input_lang_ru_en1.n_words, hidden_size).to(device)
attn_decoder_ru_en1 = AttnDecoderRNN_lecture(hidden_size, output_lang_ru_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_ru_en1,output_lang_ru_en1,train_pairs_ru_en1, encoder_ru_en1, attn_decoder_ru_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_ru_en1,output_lang_ru_en1,encoder_ru_en1, attn_decoder_ru_en1, test_pairs_ru_en1)

print(' BLEU score for attention in lecture(ru-en)')
print(score11)


In [None]:
encoder_de_en1 = EncoderRNN(input_lang_de_en1.n_words, hidden_size).to(device)
attn_decoder_de_en1 = AttnDecoderRNN_lecture(hidden_size, output_lang_de_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_de_en1,output_lang_de_en1,train_pairs_de_en1, encoder_de_en1, attn_decoder_de_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_de_en1,output_lang_de_en1,encoder_de_en1, attn_decoder_de_en1, test_pairs_de_en1)

print(' BLEU score for attention in lecture(de-en)')
print(score11)


## Multiplicative attention 

In [None]:
encoder_fr_en1 = EncoderRNN(input_lang_fr_en1.n_words, hidden_size).to(device)
attn_decoder_fr_en1 = AttnDecoderRNN_mul(hidden_size, output_lang_fr_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_fr_en1,output_lang_fr_en1,train_pairs_fr_en1, encoder_fr_en1, attn_decoder_fr_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)

print(' BLEU score for Multiplicative attention(fr-en)')
print(score11)


## Additive attention 

In [None]:
encoder_fr_en1 = EncoderRNN(input_lang_fr_en1.n_words, hidden_size).to(device)
attn_decoder_fr_en1 = AttnDecoderRNN_add(hidden_size, output_lang_fr_en1.n_words, dropout_p=0.1).to(device)

trainIters(input_lang_fr_en1,output_lang_fr_en1,train_pairs_fr_en1, encoder_fr_en1, attn_decoder_fr_en1, 15000, print_every=5000)

score11 = evaluateBleu(input_lang_fr_en1,output_lang_fr_en1,encoder_fr_en1, attn_decoder_fr_en1, test_pairs_fr_en1)

print(' BLEU score for Additive attention attention(fr-en)')
print(score11)
