In [1]:
import argparse
import numpy as np
import torch
import torch.nn as nn

利用seq2seq，寻找单词的反义词

S: decoding input starting

E: decoding output starting

P: fill blank

In [2]:
n_step = 5
n_hidden = 128
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n:i for i,n in enumerate(char_arr)}
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]
n_class = len(num_dic) # 29
batch_size = len(seq_data) # 6

In [3]:
def make_batch():
    input_batch, output_batch, target_batch = [], [], []
    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))
        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]
        
        input_batch.append(np.eye(n_class)[input]) # 6,5,29
        output_batch.append(np.eye(n_class)[output]) # 6,6,29
        target_batch.append(target) # 6,6 
        # 利用input的encoding，和output的前一个字母，推断当前的字母，进行训练模型参数
    
    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)

In [4]:
input_batch, output_batch, target_batch = make_batch()

In [6]:
class Seq2Seq(nn.Module):
    def __init__(self):
        super(Seq2Seq, self).__init__()
        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.fc = nn.Linear(n_hidden, n_class)
    
    def forward(self, enc_input, enc_hidden, dec_input):
        enc_input = enc_input.transpose(0, 1)  # enc_input: max_len=(n_step, time_step), batch_size, n_class
        dec_input = dec_input.transpose(0, 1)
        
        _, enc_states = self.enc_cell(enc_input, enc_hidden)
        # enc_states: [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        outputs, _ = self.dec_cell(dec_input, enc_states)
        # outputs: [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]
        
        model = self.fc(outputs) # model: [max_len+1(=6), batch_size, n_class]
        return model

In [7]:
model = Seq2Seq()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

  "num_layers={}".format(dropout, num_layers))


In [8]:
for epoch in range(5000):
    hidden = torch.zeros(1, batch_size, n_hidden)
    optimizer.zero_grad()
    output = model(input_batch, hidden, output_batch)
    output = output.transpose(0, 1)
    loss = 0
    for i in range(0, len(target_batch)):
        loss += criterion(output[i], target_batch[i])
    if (epoch+1)%1000 == 0:
        print(epoch, loss)
    loss.backward()
    optimizer.step()

999 tensor(0.0033, grad_fn=<AddBackward0>)
1999 tensor(0.0009, grad_fn=<AddBackward0>)
2999 tensor(0.0004, grad_fn=<AddBackward0>)
3999 tensor(0.0002, grad_fn=<AddBackward0>)
4999 tensor(0.0001, grad_fn=<AddBackward0>)


In [20]:
hidden = torch.zeros(1, 1, 128)
for i in range(6):
    input_batch_one = input_batch[i].unsqueeze(0)
    #output_batch_one = output_batch[i].unsqueeze(0)
    test_batch = [np.eye(n_class)[[num_dic[n] for n in 'SPPPPP']]]
    test_batch = torch.FloatTensor(test_batch)
    output = model(input_batch_one, hidden, test_batch)
    # 利用已经训练好的模型进行预测时，就不需要output_batch,这里为了以示区别，用test_batch替代
    predict = output.data.max(2, keepdim=True)[1]
    decoded = [char_arr[i] for i in predict]
    end = decoded.index('E')
    translated = ''.join(decoded[:end])
    print(seq_data[i][0].replace('P',''),'--> ',translated.replace('P',''))

man -->  women
black -->  white
king -->  queen
girl -->  boy
up -->  down
high -->  low
