In [1]:
import torch
import torch.nn as nn
from torch import optim
from tqdm import tqdm
import torch.nn.functional as F

from process_data import *
from my_decoder import *
from my_encoder import *
from utils import *
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

%matplotlib inline
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
input_seq, output_seq, pairs = prepareData('../data/tatoeba/eng-fra.txt', True)

Reading lines...
Read 135842 sentence pairs
Trimmed to 135842 sentence pairs
Counting words...
Counted words:
input 21334
output 13043


In [3]:
teacher_forcing_ratio = 0.5
MAX_LENGTH = 50

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size)#, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]
    
    decoder_input = torch.tensor([[SOS_token]])#, device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        #decoder_attention
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)#, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        #decoder_attention
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)#, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [4]:
def trainIters(train_pairs, encoder, decoder, n_iters, print_every=5000, plot_every=1000, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(input_seq, output_seq, random.choice(train_pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in tqdm(range(1, n_iters + 1)):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
from sklearn.model_selection import train_test_split
train_pairs, test_pairs = train_test_split(pairs, test_size = 0.2, shuffle = True, random_state = 44)

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_seq.n_words, hidden_size)#.to(device)
attn_decoder1 = DecoderRNN(hidden_size, output_seq.n_words)#, dropout_p=0.1)#.to(device)
trainIters(train_pairs, encoder1, attn_decoder1, 1000000, print_every=5000)

  0%|          | 1024/1000000 [03:16<53:23:26,  5.20it/s]

In [None]:
from predict import *
evaluateRandomly(input_seq, output_seq, test_pairs, encoder1, attn_decoder1, max_length = MAX_LENGTH)

In [None]:
compute_test_accuracy(input_seq, output_seq, test_pairs, encoder1, attn_decoder1, max_length=MAX_LENGTH)