In [12]:
import torch
import torch.nn as nn
import pickle
import random

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MAX_LENGTH = 10

# Encoder model
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
    
    def forward(self, x, hidden):
        embedded = self.embedding(x).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

# Decoder model
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x, hidden):
        embedded = self.embedding(x).view(1, 1, -1)
        output = F.relu(embedded)
        output, hidden = self.gru(output, hidden)
        
        output = self.fc(output[0])
        output = self.softmax(output)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

    
# Attention decoder model
class AttnDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_prob=0.1, max_length=MAX_LENGTH):
        super(AttnDecoder, self).__init__()
        
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_prob = dropout_prob
        self.max_length = max_length
        
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attn = nn.Linear(hidden_size*2, self.max_length)
        self.attn_combine = nn.Linear(hidden_size*2, hidden_size)
        
        self.dropout = nn.Dropout(dropout_prob)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden, encoder_outputs):
        embedded = self.embedding(x).view(1, 1, -1)
        embedded = self.dropout(embedded)
        
        attn_input = torch.cat((embedded[0], hidden[0]), dim=1)
        
        attn_weight = F.softmax(self.attn(attn_input), dim=1)
        attn_applied = torch.bmm(attn_weight.unsqueeze(0), encoder_outputs.unsqueeze(0))
        
        attn_combine_input = torch.cat((embedded[0], attn_applied[0]), 1)
    
        output = self.attn_combine(attn_combine_input).unsqueeze(0)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        
        output = self.fc(output[0])
        output = F.log_softmax(output, dim=1)
        
        return output, hidden, attn_weight

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [13]:
encoder = torch.load('eng-fra/encoder.pth', map_location=lambda storage, loc: storage)
decoder = torch.load('eng-fra/decoder.pth', map_location=lambda storage, loc: storage)

print(encoder)
print(decoder)

Encoder(
  (embedding): Embedding(2926, 256)
  (gru): GRU(256, 256)
)
AttnDecoder(
  (embedding): Embedding(4490, 256)
  (attn): Linear(in_features=512, out_features=10, bias=True)
  (attn_combine): Linear(in_features=512, out_features=256, bias=True)
  (dropout): Dropout(p=0.1)
  (gru): GRU(256, 256)
  (fc): Linear(in_features=256, out_features=4490, bias=True)
)


  "type " + container_type.__name__ + ". It won't be checked "
  "type " + container_type.__name__ + ". It won't be checked "


In [18]:
# unicode 문자열 --> ASCII 코드로 변환
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# 소문자 변환, trim, 알파벳 아닌 문자 제외
def normalize_string(s):
    s = unicode_to_ascii(s.lower().strip()) # lowercase
    s = re.sub(r"([.!?])", r" \1", s)     # trim
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [19]:
def read_langs(lang1, lang2, reverse=False):
    print("reading lines...")
    
    # txt 파일 읽어와서 line 단위로 잘라내기
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').        read().strip().split('\n')
        
#     print(lines)
    
    # line 단위를 lang 별로 끊어낸 다음에 normalize
    pairs = [[normalize_string(s) for s in l.split('\t')] for l in lines]
    
#     print(pairs)
    
    # reverse pairs
    # 논문에서 소개된 input sentence의 단어의 순서를 거꾸로 넣는 것과는 별개, 단순히 input-output의 순서쌍을 반대로 하는 것
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    
    return input_lang, output_lang, pairs


# In[13]:


MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filter_pair(p):
    return len(p[0].split()) < MAX_LENGTH and len(p[1].split()) < MAX_LENGTH and p[0].startswith(eng_prefixes)

def filter_pairs(pairs):
    return [pair for pair in pairs if filter_pair(pair)]


# In[14]:


def prepare_data(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = read_langs(lang1, lang2, reverse)
    print("read %d sentence pairs" % len(pairs))
    pairs = filter_pairs(pairs)
    print("trimmed to %d sentence pairs" % len(pairs))
    print("counting words...")
    
    for pair in pairs:
        input_lang.add_sentence(pair[0])
        output_lang.add_sentence(pair[1])
        
    print("counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    
    print("\nInput language object information:")
    input_lang.get_infos()
    
    print("\nTarget language object information:")
    output_lang.get_infos()
    
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepare_data(lang1='eng', lang2='fra', reverse=False)

reading lines...


NameError: name 'unicodedata' is not defined

In [14]:
def idxs_from_sentence(lang, sentence):
    out = []
    for word in sentence.split():
        if word in lang.word2idx:
            out.append(lang.word2idx[word])
        else:
            out.append(lang.word2idx['UNK'])
    return out

def tensor_from_sentence(lang, sentence):
    idxs = idxs_from_sentence(lang, sentence)
    idxs.append(EOS_token)
    return torch.tensor(idxs, dtype=torch.long, device=device).view(-1, 1)

def tensors_from_pair(pair):
    input_tensor = tensor_from_sentence(input_lang, pair[0])
    target_tensor = tensor_from_sentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [15]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    # set model test mode
    with torch.no_grad():
        # seq2seq 모델의 한계: 
        # training vocabulary 안에 존재하는 word에 대해서만 forwarding이 가능.
        # 처음 보는 word에 대해서는 <UKN> token 처리함으로서 정보 손실.
        input_tensor = tensor_from_sentence(input_lang, sentence)
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.init_hidden()
        
        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
        
        # forward to encoder network
        for idx in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[idx], encoder_hidden)
            encoder_outputs[idx] = encoder_output[0, 0]
            
        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden
        
        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)
        
        for idx in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[idx] = decoder_attention.data
            top_value, top_idx = decoder_output.data.topk(1)
            
            if top_idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.idx2word[top_idx.item()])
            
            decoder_input = top_idx.squeeze().detach()
            
        return decoded_words, decoder_attentions[:idx+1]


# In[ ]:


def random_evaluate(encoder, decoder, samples=10):
    for idx in range(samples):
        pair = random.choice(pairs)
        print("input:", pair[0])
        print("target:", pair[1])
        
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        
        print("output:", output_sentence)
        print()

In [16]:
random_evaluate(encoder, decoder)

NameError: name 'pairs' is not defined