<a href="https://colab.research.google.com/github/sourcecode369/transformers-tutorials/blob/master/Seq2Seq/Seq2Seq_Change_Word.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
n_step = 5
n_hidden = 128

char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n:i for i, n in enumerate(char_arr)}
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]
n_class = len(num_dic)
batch_size = len(seq_data)

In [5]:
class Seq2Seq(nn.Module):
    
    def __init__(self):
        super(Seq2Seq, self).__init__()
        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
        self.fc = nn.Linear(n_hidden, n_class)
    
    def forward(self, enc_input, enc_hidden, dec_input):
        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]
        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]

        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        _, enc_states = self.enc_cell(enc_input, enc_hidden)
        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]
        outputs, _ = self.dec_cell(dec_input, enc_states)
        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]
        return model

In [6]:
model = Seq2Seq()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

  "num_layers={}".format(dropout, num_layers))


In [7]:
def make_batch():
    input_batch, output_batch, target_batch = [], [], []
    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))
        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]
        input_batch.append(np.eye(n_class)[input])
        output_batch.append(np.eye(n_class)[output])
        target_batch.append(target) # not one-hot

    # make tensor
    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)

In [8]:
input_batch, output_batch, target_batch = make_batch()

In [39]:
for epoch in range(10000):
    # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
    hidden = torch.zeros(1, batch_size, n_hidden)

    optimizer.zero_grad()
    # input_batch : [batch_size, max_len(=n_step, time step), n_class]
    # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]
    # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot
    output = model(input_batch, hidden, output_batch)
    # output : [max_len+1, batch_size, n_class]
    output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]
    loss = 0
    for i in range(0, len(target_batch)):
        # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]
        loss += criterion(output[i], target_batch[i])
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    loss.backward()
    optimizer.step()

Epoch: 1000 cost = 0.000060
Epoch: 2000 cost = 0.000035
Epoch: 3000 cost = 0.000020
Epoch: 4000 cost = 0.000012
Epoch: 5000 cost = 0.000007
Epoch: 6000 cost = 0.000004
Epoch: 7000 cost = 0.000002
Epoch: 8000 cost = 0.000001
Epoch: 9000 cost = 0.000001
Epoch: 10000 cost = 0.000000


In [52]:
def translate():
    input_batch, output_batch, _ = make_batch()
    # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
    hidden = torch.zeros(1, batch_size, n_hidden)
    output = model(input_batch, hidden, output_batch)
    # output : [max_len+1(=6), batch_size(=1), n_class]
    output = output.transpose(0, 1)
    translations = []
    predict = output.data.max(2, keepdim=True)[1] # select n_class dimension
    for pred in predict:
        decoded = [char_arr[i] for i in pred.squeeze()]
        end = decoded.index('E')
        translated = ''.join(decoded[:end])
        translated = translated.replace('P', '')
        translations.append(translated)
    return translations

In [53]:
translate()

['women', 'white', 'queen', 'boy', 'down', 'low']