In [5]:
# Adapted from:
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

# Translation with a sequence to sequence network and attention
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import time
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

from utils import timeSince, filterPairs, tensorsFromPair, tensorFromSentence, showPlot, normalizeString

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from urllib.request import urlretrieve
url = 'https://download.pytorch.org/tutorial/data.zip'
data = urlretrieve(url, './translation_data.zip')

import zipfile
zip = zipfile.ZipFile('./translation_data.zip')
zip.extractall()

In [3]:
SOS_token = 0
EOS_token = 1

class Lang:
    '''Class word → index (word2index) and
    index → word (index2word) dictionaries'''
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [5]:
# To read the data file we will split the file into
# lines, and then split lines into pairs.
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    return input_lang, output_lang, pairs


In [7]:
# Read text file and split into lines, split lines into pairs
# Normalize text, filter by length and content
# Make word lists from sentences in pairs
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

Reading lines...
Read 135842 sentence pairs
Trimmed to 10599 sentence pairs
Counting words...
Counted words:
fra 4345
eng 2803
['ils discutent du probleme .', 'they are discussing the problem .']


In [28]:
# “Teacher forcing” is the concept of using the real target outputs as each next input,
# instead of using the decoder’s guess as the next input.
# Using teacher forcing causes it to converge faster but when the trained network is exploited,
# it may exhibit instability.
teacher_forcing_ratio = 0.5
MAX_LENGTH = 10

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, \
          decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()
    # zero optimizers
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for e_idx in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[e_idx], encoder_hidden)
        encoder_outputs[e_idx] = encoder_output[0, 0]
    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for d_idx in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[d_idx])
            decoder_input = target_tensor[d_idx]  # Teacher forcing
    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss.item() / target_length

In [13]:
# Start a timer
# Initialize optimizers and criterion
# Create set of training pairs
# Start empty losses array for plotting

def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1] # pair of input and output sentence
        input_tensor = training_pair[0] # input sentence
        target_tensor = training_pair[1] # output sentence

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [16]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
        decoder_hidden = encoder_hidden
        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])
            decoder_input = topi.squeeze().detach()
        return decoded_words, decoder_attentions[:di + 1]

In [17]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [18]:
input_lang.n_words
#output_lang.n_words

4345

In [30]:
from models import EncoderRNN, DecoderRNN, AttnDecoderRNN
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

#trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

In [20]:
evaluateRandomly(encoder1, attn_decoder1)


> il est fort comme un cheval .
= he s as strong as a horse .
< he is as strong as a horse . <EOS>

> elles cherchent tom .
= they re looking for tom .
< they re looking for tom . <EOS>

> vous etes puissants .
= you re powerful .
< you re powerful . <EOS>

> vous n y allez pas si ?
= you re not going are you ?
< you re not going are you ? <EOS>

> c est toi qui t es emballe .
= you re the one that went crazy .
< you re the one that went crazy . <EOS>

> vous etes fort brave .
= you re very brave .
< you re very brave . <EOS>

> nous ne sommes pas ici pour t arreter .
= we are not here to arrest you .
< we are not here to arrest you . <EOS>

> je suis ouvert aux propositions .
= i m open to suggestions .
< i m open to suggestions . <EOS>

> tu vas rire .
= you re going to laugh .
< you re going to . <EOS>

> il est assis sur la chaise .
= he is sitting on the chair .
< he is sitting on the chair . <EOS>



In [51]:
enc = EncoderRNN(2048, hidden_size)
enc_hidden = enc.initHidden()
feat = torch.rand(2048).to(torch.int64)
enc(feat, enc_hidden)

RuntimeError: input.size(-1) must be equal to input_size. Expected 256, got 524288

In [50]:
feat.shape

torch.Size([2048])

In [41]:
input_lang.n_words

4345

In [48]:
#training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)]

tensorsFromPair(random.choice(pairs))[1].shape

torch.Size([9, 1])

In [3]:
import torch
from models import EncoderRNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

enc = EncoderRNN(input_size=2048, hidden_size=256)
enc = enc.to(device)
#enc_hidden = enc.initHidden()
feat = torch.rand(2048).to(device)
out, (h_c, c_f) = enc(feat.view(1,1,-1)) #, enc_hidden)
#lstm = nn.LSTM(input_size=2048, hidden_size=256)
#lstm(feat.unsqueeze(0))

In [8]:
c_f.shape

torch.Size([1, 1, 256])