In [None]:
import os
import sys
import argparse
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F

use_cuda = torch.cuda.is_available()

In [64]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.grus = []
        for i in range(self.n_layers):
            self.grus.append(nn.GRU(hidden_size, hidden_size))
        
        self.hiddens = self.initAllHiddens()
              
    def forward(self, input):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        #hidden = self.hiddens[0]
        for i in range(self.n_layers):
            output, self.hiddens[i] = self.grus[i](output, self.hiddens[i])
            hidden = self.hiddens[i]
        return output, hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

    def initAllHiddens(self):
        self.hiddens = []
        for i in range(self.n_layers):
            self.hiddens.append(self.initHidden())
            
            
            
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        #self.gru = nn.GRU(hidden_size, hidden_size)
        self.grus = []
        for i in range(self.n_layers):
            self.grus.append(nn.GRU(hidden_size, hidden_size))        
        self.hiddens = self.initAllHiddens()

        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        self.hiddens[0] = hidden
        for i in range(self.n_layers):
            output, self.hiddens[i] = self.grus[i](output, self.hiddens[i])
            output = F.relu(output)
            hidden = self.hiddens[i]
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result
        
    def initAllHiddens(self):
        self.hiddens = []
        for i in range(self.n_layers):
            self.hiddens.append(self.initHidden())
            
 
class EncoderDecoder(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, SOS_token, n_layers=1, tied_embedding=False):
        super(EncoderDecoder, self).__init__()
        self.encoder = EncoderRNN(input_size, hidden_size, n_layers) 
        self.decoder = DecoderRNN(hidden_size, output_size, n_layers)
        if tied_embedding and input_size == output_size:
            self.decoder.embedding = self.encoder.embedding

        self.SOS_token = Variable(torch.LongTensor([[SOS_token]]))
        self.SOS_token = self.SOS_token.cuda() if use_cuda else self.SOS_token

    def resetHiddenStates(self):
        self.encoder.initAllHiddens()
        self.decoder.initAllHiddens()

    def forward(self, input_sequence, target_sequence):            
        input_length=input_sequence.size(0)
        target_length=target_sequence.size(0)
        
        self.resetHiddenStates()


        for ei in range(input_length):
            encoder_output, encoder_hidden = self.encoder( input_sequence[ei])

        decoder_input = self.SOS_token
        decoder_hidden = encoder_hidden

        decoder_outputs = Variable(torch.zeros(target_length + 1, self.decoder.output_size))
        decoder_outputs = decoder_outputs.cuda() if use_cuda else decoder_outputs

        for  di in range(target_length):
            decoder_output, decoder_hidden = self.decoder(
                decoder_input, decoder_hidden)
            #loss += criterion(decoder_output[0], target_variable[di])
            decoder_outputs[di] = decoder_output[0]
            decoder_input = target_variable[di]  # Teacher forcing


        return decoder_outputs

class KKcoder(nn.Module):
    def __init__(self):
        super(KKcoder, self).__init__()
            
    def forward(self, input_sequence, target_sequence):
        print("hola")
        return input_sequence, target_sequence

In [77]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(input_size = hidden_size, hidden_size = hidden_size, 
                          num_layers = n_layers, batch_first = True) #dropout = 0
        
        self.hidden = self.initHidden()
              
    def forward(self, input):
        embedded = self.embedding(input).view(1, 1, -1)
        
        output, self.hidden = self.gru(embedded, self.hidden)
        
        return output, self.hidden

    def initHidden(self):
        result = Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result
    def initAllHiddens(self):
        self.hidden = self.initHidden()
            
            
            
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        
        self.gru = nn.GRU(input_size = hidden_size, hidden_size = hidden_size, 
                          num_layers = n_layers, batch_first = True) #dropout = 0
        
        self.hidden = self.initHidden()
        
#         #self.gru = nn.GRU(hidden_size, hidden_size)
#         self.grus = []
#         for i in range(self.n_layers):
#             self.grus.append(nn.GRU(hidden_size, hidden_size))        
#         self.hiddens = self.initAllHiddens()

        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax()

#     def forward(self, input, hidden):
#         output = self.embedding(input).view(1, 1, -1)
#         for i in range(self.n_layers):
#             output, self.hiddens[i] = self.grus[i](output, self.hiddens[i])
#             output = F.relu(output)
#             hidden = self.hiddens[i]
#         output = self.softmax(self.out(output[0]))
#         return output, hidden

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        
        output, self.hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output[0]))
        return output, self.hidden
    
    
    def initHidden(self):
        result = Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result
    def initAllHiddens(self):
        self.hidden = self.initHidden()
            
 
class EncoderDecoder(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, SOS_token, n_layers=1, tied_embedding=False):
        super(EncoderDecoder, self).__init__()
        self.encoder = EncoderRNN(input_size, hidden_size, n_layers) 
        self.decoder = DecoderRNN(hidden_size, output_size, n_layers)
        if tied_embedding and input_size == output_size:
            self.decoder.embedding = self.encoder.embedding

        self.SOS_token = Variable(torch.LongTensor([[SOS_token]]))
        self.SOS_token = self.SOS_token.cuda() if use_cuda else self.SOS_token

    def resetHiddenStates(self):
        self.encoder.initAllHiddens()
        self.decoder.initAllHiddens()

    def forward(self, input_sequence, target_sequence):            
        input_length=input_sequence.size(0)
        target_length=target_sequence.size(0)
        
        self.resetHiddenStates()


        for ei in range(input_length):
            encoder_output, encoder_hidden = self.encoder( input_sequence[ei])

        decoder_input = self.SOS_token
        decoder_hidden = encoder_hidden

        decoder_outputs = Variable(torch.zeros(target_length + 1, self.decoder.output_size))
        decoder_outputs = decoder_outputs.cuda() if use_cuda else decoder_outputs

        for  di in range(target_length):
            decoder_output, decoder_hidden = self.decoder(
                decoder_input, decoder_hidden)
            #loss += criterion(decoder_output[0], target_variable[di])
            decoder_outputs[di] = decoder_output[-1]
            decoder_input = target_variable[di]  # Teacher forcing


        return decoder_outputs


In [2]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

In [3]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs


MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]



In [4]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

Reading lines...
Read 141382 sentence pairs
Trimmed to 11132 sentence pairs
Counting words...
Counted words:
fra 4540
eng 2953
['il est fatigue de lire .', 'he is tired of reading .']


In [5]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def variableFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    result = Variable(torch.LongTensor(indexes).view(-1, 1))
    if use_cuda:
        return result.cuda()
    else:
        return result


def variablesFromPair(pair):
    input_variable = variableFromSentence(input_lang, pair[0])
    target_variable = variableFromSentence(output_lang, pair[1])
    return (input_variable, target_variable)

In [78]:
hidden_size = 256
learning_rate = 0.001
n_epochs = 1000
enc = EncoderRNN(input_size=input_lang.n_words, hidden_size=hidden_size)

encdec = EncoderDecoder(input_size=input_lang.n_words, hidden_size=hidden_size, 
                        output_size=output_lang.n_words,n_layers=2,SOS_token=SOS_token)





encdecoder_optimizer = optim.SGD(encdec.parameters(), lr=learning_rate)
training_pairs = [variablesFromPair(random.choice(pairs))
                  for i in range(n_epochs)]
criterion = nn.NLLLoss()



In [79]:
print(encdec.parameters())
# params = list(encdec.parameters())
print(len(params))
for param in params:
    print(param.size())

<generator object Module.parameters at 0x7facbf5625c8>
4
torch.Size([4540, 256])
torch.Size([2953, 256])
torch.Size([2953, 256])
torch.Size([2953])


In [83]:
from IPython.core.debugger import Tracer
encdecoder_optimizer.zero_grad()
loss = 0

for epoch in range(1, n_epochs + 1):
    training_pair = training_pairs[epoch - 1]
    input_variable = training_pair[0]
    target_variable = training_pair[1]
    #Tracer()()
    #enc.initAllHiddens()
    #enc.forward(input=input_variable)
    encdec.resetHiddenStates()
    decoder_output = encdec(input_variable, target_variable)
    for di in range(target_variable.size()[0]):
        loss += criterion(decoder_output[0], target_variable[di])
    
    if (epoch+1) % 50 == 0:
        loss.backward()
        encdecoder_optimizer.step()
        print("loss: ", loss)
        encdecoder_optimizer.zero_grad()
        loss = 0



loss:  Variable containing:
 1617.7274
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1722.6593
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1705.7395
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1710.3857
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1669.3408
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1594.0073
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1591.6758
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1753.5499
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1684.4242
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1746.9481
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1664.7195
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1675.6012
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1664.0565
[torch.FloatTensor of size 1]

loss:  Variable containing:
 1814.8243
[torch.FloatTensor of size 1]

loss:  Variable cont

In [66]:
kkcoder = KKcoder()

    
kkcoder.forward(input_variable,target_variable)



hola


(Variable containing:
    24
    25
  2824
   120
   197
  3919
   106
   360
     5
     1
 [torch.LongTensor of size 10x1], Variable containing:
    14
    42
  2408
   540
  2461
   525
  2462
     4
     1
 [torch.LongTensor of size 9x1])