In [1]:
%matplotlib inline

In [2]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# loading data files

SOS_token = 0 # Start Of String
EOS_token = 1 # End of String

class Lang(object):
    def __init__(self, name):
        self.name = name
        self.word2idx = {}
        self.word2count = {}
        self.idx2word = {0: 'SOS', 1: 'EOS'}
        self.n_words = 2
    
    def add_sentence(self, sentence):
        for word in sentence.split():
            self.add_word(word)
        
    def add_word(self, word):
        # word2idx는 SOS, EOS token 없이 단어만 포함 --> idx 2부터 시작(0, 1은 token 자리로 남겨둠)
        # idx2word는 SOS, EOS token 포함한 채로 시작 --> idx 0부터 시작
        if word not in self.word2idx:
            self.word2idx[word] = self.n_words
            self.word2count[word] = 1
            self.idx2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
    
    # test code to check some informations
    def get_infos(self):
        print('name:', self.name)
        print('word2idx:', self.word2idx)
        print('word2count:', self.word2count)
        print('idx2word:', self.idx2word)
        print('n_words:', self.n_words)

In [4]:
# unicode string to ASCII code string
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip()) # lowercase
    s = re.sub(r"([.!?])", r" \1", s)     # trim
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [5]:
def read_langs(lang1, lang2, reverse=False):
    print("reading lines...")
    
    # read txt file and split into lines
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
        
    print(lines)
    
    # lines --> (lang1, lang2) pairs
    # and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    
    print(pairs)
    
    # reverse pairs
    # 논문에서 소개된 input sentence의 단어의 순서를 거꾸로 넣는 것과는 별개, 단순히 input-output의 순서쌍을 반대로 하는 것
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    
    return input_lang, output_lang, pairs

In [10]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filter_pair(p):
    return len(p[0].split()) < MAX_LENGTH and len(p[1].split()) < MAX_LENGTH

def filter_pairs(pairs):
    return [pair for pair in pairs if filter_pair(pair)]

In [21]:
def prepare_data(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = read_langs(lang1, lang2, reverse)
    print("read %d sentence pairs" % len(pairs))
    pairs = filter_pairs(pairs)
    print("trimmed to %d sentence pairs" % len(pairs))
    print("counting words...")
    for pair in pairs:
        input_lang.add_sentence(pair[0])
        output_lang.add_sentence(pair[1])
    print("counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepare_data('question', 'answer', True)

print("\ninput_lang:")
input_lang.get_infos()
print("\noutput_lang:")
output_lang.get_infos()

print(random.choice(pairs))        

reading lines...
['where is a toliet?\tgo straight and turn left.', 'what is your name?\tmy name is youngin song.', 'how much is it?\tit is 2 dollars.']
[['where is a toliet ?', 'go straight and turn left .'], ['what is your name ?', 'my name is youngin song .'], ['how much is it ?', 'it is dollars .']]
read 3 sentence pairs
trimmed to 3 sentence pairs
counting words...
counted words:
answer 15
question 13

input_lang:
name: answer
word2idx: {'go': 2, 'straight': 3, 'and': 4, 'turn': 5, 'left': 6, '.': 7, 'my': 8, 'name': 9, 'is': 10, 'youngin': 11, 'song': 12, 'it': 13, 'dollars': 14}
word2count: {'go': 1, 'straight': 1, 'and': 1, 'turn': 1, 'left': 1, '.': 3, 'my': 1, 'name': 1, 'is': 2, 'youngin': 1, 'song': 1, 'it': 1, 'dollars': 1}
idx2word: {0: 'SOS', 1: 'EOS', 2: 'go', 3: 'straight', 4: 'and', 5: 'turn', 6: 'left', 7: '.', 8: 'my', 9: 'name', 10: 'is', 11: 'youngin', 12: 'song', 13: 'it', 14: 'dollars'}
n_words: 15

output_lang:
name: question
word2idx: {'where': 2, 'is': 3, 'a'

In [22]:
# define models

# Encoder model
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
    
    def forward(self, x, hidden):
        embedded = self.embedding(x).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

# Decoder model
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x, hidden):
        embedded = self.embedding(x).view(1, 1, -1)
        output = F.relu(embedded)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.fc(output[0]))
        
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

    
# Attention decoder model
class AttnDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_prob=0.1, max_length=MAX_LENGTH):
        super(AttnDecoder, self).__init__()
        
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_prob = dropout_prob
        self.max_length = max_length
        
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attn = nn.Linear(hidden_size*2, self.max_length)
        self.attn_combine = nn.Linear(hidden_size*2, hidden_size)
        
        self.dropout = nn.Dropout(dropout_prob)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden, encoder_outputs):
        embedded = self.embedding(x).view(1, 1, -1)
        embedded = self.dropout(embedded)
        
        attn_weight = F.softmax(
                        self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weight.unsqueeze(0), encoder_outputs.unsqueeze(0))
        
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.fc(output[0])
        
        output = F.log_softmax(output, dim=1)
        
        return output, hidden, attn_weight

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [23]:
def idxs_from_sentence(lang, sentence):
    return [lang.word2idx[word] for word in sentence.split()]

def tensor_from_sentence(lang, sentence):
    idxs = idxs_from_sentence(lang, sentence)
    idxs.append(EOS_token)
    return torch.tensor(idxs, dtype=torch.long, device=device).view(-1, 1)

def tensors_from_pair(pair):
    input_tensor = tensor_from_sentence(input_lang, pair[0])
    target_tensor = tensor_from_sentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [34]:
teacher_forcing_ratio = 0.5

# unit forward-backward function
# return unit loss
def unit_trainer(input_tensor, target_tensor, 
         encoder, decoder,
         encoder_optimizer, decoder_optimizer,
         criterion, max_length=MAX_LENGTH):
    
    encoder_hidden = encoder.init_hidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
    
    loss = 0.
    
    print("input tensor:", input_tensor)
    print("target tensor:", target_tensor)
    
    # make encoder output list
    for idx in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[idx], encoder_hidden)
#         print("encoder output:", encoder_output)
        
        encoder_outputs[idx] = encoder_output[0, 0]
#         print("encoder outputs:", encoder_outputs)
    print("encoder outputs:", encoder_outputs)
    print("\n")
    
    decoder_input = torch.tensor([[SOS_token]], device=device)
    print("decoder input:", decoder_input)
    
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for idx in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
#             print("decoder output:", decoder_output)
            loss += criterion(decoder_output, target_tensor[idx])
            decoder_input = target_tensor[idx]
#             print("decoder input:", decoder_input)
        print("decoder output:", decoder_output)    
        print("\n")

            
    else:
        for idx in range(target_length):
            decoder_output, decoder_hidden, decodr_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
#             print("decoder output:", decoder_output)

            top_value, top_idx = decoder_output.topk(1)
            decoder_input = top_idx.squeeze().detach()
#             print("decoder input:", decoder_input)
            
            loss += criterion(decoder_output, target_tensor[idx])
            
            if decoder_input.item() == EOS_token:
                break
        print("decoder output:", decoder_output)
        print("\n")

                
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item() / target_length

In [35]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [36]:
import numpy as np

def trainer(encoder, decoder, num_epochs,
           log_interval, lr):
    
    start_time = time.time()
    
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr)
    
    training_pairs = [tensors_from_pair(random.choice(pairs)) for i in range(num_epochs)]
    criterion = nn.NLLLoss()
    
    print("Learning started!!!")
    
    for epoch in range(num_epochs):
        losses = []
        training_pair = training_pairs[epoch]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        
        loss = unit_trainer(input_tensor, target_tensor,
                           encoder, decoder,
                           encoder_optimizer, decoder_optimizer, criterion)
        losses.append(loss)
        
        if (epoch+1) % log_interval == 0:
            avg_loss = np.mean(losses)
            print("%s (%d %.3f%%) %.3f"
                 % (timeSince(start_time, (epoch+1) / num_epochs),
                   epoch+1, float(epoch+1) / num_epochs * 100, avg_loss))
            
    print("Learning finished!!!")
    
    torch.save(encoder.state_dict(), "question-answer/encoder.pth")
    torch.save(decoder.state_dict(), "question-answer/decoder.pth")

In [37]:
hidden_size = 256

encoder = Encoder(input_lang.n_words, hidden_size).to(device)
attn_decoder = AttnDecoder(hidden_size, output_lang.n_words, dropout_prob=0.1).to(device)

trainer(encoder, attn_decoder, num_epochs=100, log_interval=25, lr=0.01)

Learning started!!!
input tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 1]])
target tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.3362,  0.3656, -0.2953,  ..., -0.1277, -0.0241,  0.3890],
        [-0.1145, -0.0253,  0.0996,  ..., -0.0032,  0.1761,  0.4350],
        [-0.3287, -0.1285, -0.0654,  ..., -0.0286,  0.4117,  0.1502],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-2.4497, -2.5639, -2.5796, -2.4772, -2.5983, -2.7614, -2.4992,
         -2.6051, -2.6257, -2.4740, -2.5112, -2.6163, -2.6259]])


input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [

decoder output: tensor([[-2.8769, -2.0770, -2.9612, -2.2565, -2.7767, -2.8304, -2.1755,
         -2.7273, -2.3482, -2.4585, -3.0244, -2.7473, -2.6889]])


input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],
        [ 12],
        [  6],
        [  1]])
encoder outputs: tensor([[-0.1487,  0.0069, -0.2060,  ...,  0.1853, -0.0380, -0.0913],
        [-0.3493, -0.0416, -0.1967,  ..., -0.1298,  0.1996, -0.3383],
        [-0.1276,  0.1145,  0.0279,  ..., -0.1670,  0.3725, -0.0572],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-2.7902, -2.0074, -2.8937, -2.2660, -2.6559, -2.6783, -2.1938,
         -2.6914, -2.5491, -2.5807, -2.9681, -2.7774, -2.8220]])


input tenso

input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],
        [ 12],
        [  6],
        [  1]])
encoder outputs: tensor([[-0.1514,  0.0087, -0.2147,  ...,  0.1777, -0.0457, -0.0929],
        [-0.3524, -0.0460, -0.1971,  ..., -0.1048,  0.1957, -0.3376],
        [-0.1323,  0.1172, -0.0024,  ..., -0.1491,  0.3701, -0.0613],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-3.6208, -1.5572, -3.5913, -2.2905, -3.0514, -2.9452, -1.5230,
         -3.3447, -2.5494, -2.3575, -3.7708, -3.1806, -3.1431]])


input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],
        [ 12],
 

decoder output: tensor([[-4.4286, -1.0246, -4.4605, -2.8825, -3.6810, -3.5609, -1.2957,
         -4.2664, -2.7196, -2.5814, -4.6208, -3.8596, -3.0417]])


input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],
        [ 12],
        [  6],
        [  1]])
encoder outputs: tensor([[-0.1549,  0.0090, -0.2243,  ...,  0.1671, -0.0499, -0.0942],
        [-0.3573, -0.0478, -0.2001,  ..., -0.1012,  0.1941, -0.3375],
        [-0.1369,  0.1273, -0.0389,  ..., -0.1488,  0.3687, -0.0655],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-4.3121, -1.0206, -4.3785, -3.0281, -3.6006, -3.3543, -1.1253,
         -4.1252, -3.1376, -2.9204, -4.4840, -3.8384, -3.4364]])


input tenso

decoder output: tensor([[-5.0930, -0.7557, -5.2605, -3.7988, -4.3136, -3.9028, -1.0160,
         -5.1107, -3.7970, -3.4510, -5.5600, -4.6632, -3.5921]])


input tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 1]])
target tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.3355,  0.3670, -0.2956,  ..., -0.1468, -0.0338,  0.3858],
        [-0.1048, -0.0255,  0.0978,  ..., -0.0052,  0.1921,  0.4354],
        [-0.3151, -0.1386, -0.0671,  ..., -0.0154,  0.4313,  0.1648],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-4.9110, -0.5126, -5.0485, -3.8972, -4.2717, -3.8117, -1.4165,
         -4.8149, -4.0137, -3.6052, -5.2443, -4.5190, -3.9717

input tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 1]])
target tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.3353,  0.3662, -0.2962,  ..., -0.1477, -0.0367,  0.3889],
        [-0.1011, -0.0279,  0.0959,  ..., -0.0042,  0.1947,  0.4376],
        [-0.3094, -0.1433, -0.0687,  ..., -0.0160,  0.4370,  0.1677],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-5.3462, -0.5306, -5.9845, -4.4340, -3.8537, -3.1282, -1.4079,
         -5.5883, -3.9614, -3.4943, -6.0721, -5.2733, -3.8016]])


input tensor: tensor([[  8],
        [  9],
        [ 10],
        [ 11],
        [ 12],
        [  7],
        [  1]])
target tensor: tensor([[ 7],


input tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 1]])
target tensor: tensor([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.3350,  0.3643, -0.2958,  ..., -0.1486, -0.0388,  0.3902],
        [-0.0977, -0.0328,  0.0965,  ..., -0.0029,  0.1939,  0.4388],
        [-0.3038, -0.1510, -0.0681,  ..., -0.0144,  0.4386,  0.1668],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-6.1254, -0.4151, -7.0478, -5.7468, -5.0967, -3.8528, -1.2638,
         -6.4683, -5.4112, -4.6163, -6.8455, -6.1974, -5.3984]])


input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],

decoder output: tensor([[-7.1400, -0.1521, -8.0602, -6.6117, -6.5223, -5.3453, -2.1034,
         -7.5251, -6.4227, -5.3455, -8.1468, -7.1961, -6.0267]])


0m 6s (- 0m 2s) (75 75.000%) 0.772
input tensor: tensor([[  8],
        [  9],
        [ 10],
        [ 11],
        [ 12],
        [  7],
        [  1]])
target tensor: tensor([[ 7],
        [ 3],
        [ 8],
        [ 9],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.1420,  0.1396, -0.0114,  ...,  0.0938,  0.2290,  0.1740],
        [-0.3958,  0.4827,  0.0565,  ...,  0.1124, -0.0863,  0.2087],
        [-0.5111,  0.3040, -0.0661,  ..., -0.0924,  0.2113, -0.0845],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-7.0732, -0.1350, -8.0352, -6.6805, -6.5144, -5.4007, -2.2367,
         -7.5994, -

input tensor: tensor([[  8],
        [  9],
        [ 10],
        [ 11],
        [ 12],
        [  7],
        [  1]])
target tensor: tensor([[ 7],
        [ 3],
        [ 8],
        [ 9],
        [ 6],
        [ 1]])
encoder outputs: tensor([[-0.1448,  0.1395, -0.0111,  ...,  0.0976,  0.2307,  0.1785],
        [-0.3994,  0.4828,  0.0620,  ...,  0.1227, -0.0917,  0.2182],
        [-0.5143,  0.3027, -0.0578,  ..., -0.0724,  0.2102, -0.0746],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-7.6777, -0.0867, -9.0133, -7.7424, -7.3580, -5.7890, -2.6218,
         -8.2763, -7.3366, -5.7271, -8.7233, -8.0057, -6.8920]])


input tensor: tensor([[  8],
        [  9],
        [ 10],
        [ 11],
        [ 12],
        [  7],
        [  1]])
target tensor: tensor(

input tensor: tensor([[ 13],
        [ 10],
        [ 14],
        [  7],
        [  1]])
target tensor: tensor([[ 10],
        [ 11],
        [  3],
        [ 12],
        [  6],
        [  1]])
encoder outputs: tensor([[-0.1690,  0.0109, -0.2498,  ...,  0.1261, -0.0622, -0.0968],
        [-0.3897, -0.0395, -0.2264,  ..., -0.0929,  0.1850, -0.3342],
        [-0.1711,  0.2211, -0.2624,  ..., -0.1667,  0.3770, -0.0988],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


decoder input: tensor([[ 0]])
decoder output: tensor([[-7.9132, -0.0583, -9.2434, -8.0263, -7.4612, -5.8520, -3.0357,
         -8.5893, -7.5251, -6.1207, -9.2136, -8.4829, -6.7814]])


input tensor: tensor([[  8],
        [  9],
        [ 10],
        [ 11],
        [ 12],
        [  7],
        [  1]])
target tensor: tensor([[ 7],
        [ 3],
   

In [38]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    # set model test mode
    with torch.no_grad():
        input_tensor = tensor_from_sentence(input_lang, sentence)
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.init_hidden()
        
        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
        
        # forward to encoder network
        for idx in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[idx], encoder_hidden)
            encoder_outputs[idx] = encoder_output[0, 0]
            
        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden
        
        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)
        
        for idx in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[idx] = decoder_attention.data
            top_value, top_idx = decoder_output.data.topk(1)
            
            if top_idx.item() == EOS_token:
                decoded_words.append('<EOS>')
            else:
                decoded_words.append(output_lang.idx2word[top_idx.item()])
            
            decoder_input = top_idx.squeeze().detach()
            
        return decoded_words, decoder_attentions[:idx+1]

In [39]:
def random_evaluate(encoder, decoder, samples=10):
    for idx in range(samples):
        pair = random.choice(pairs)
        print("input:", pair[0])
        print("target:", pair[1])
        
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        
        print("output:", output_sentence)
        print()

In [40]:
random_evaluate(encoder, attn_decoder)

input: my name is youngin song .
target: what is your name ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: my name is youngin song .
target: what is your name ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: my name is youngin song .
target: what is your name ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: my name is youngin song .
target: what is your name ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: go straight and turn left .
target: where is a toliet ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: it is dollars .
target: how much is it ?
output: what is your name ? <EOS> <EOS> <EOS> ? <EOS>

input: it is dollars .
target: how much is it ?
output: what is your name ? <EOS> <EOS> ? <EOS> <EOS>

input: it is dollars .
target: how much is it ?
output: what is your name ? <EOS> <EOS> ? <EOS> <EOS>

input: go straight and turn left .
target: where is a toliet ?
output: what is your name ? <EOS> <EOS> <EOS> 