In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import random

In [2]:
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]
char2idx = {c:i for i,c in enumerate(char_arr)}
idx2char = {i:c for i,c in enumerate(char_arr)}
n_class = len(char2idx)
max_seq = max([len(word) for word in np.array(seq_data).reshape(-1)])


In [3]:
def make_batch():
    inputs,outputs,targets = [],[],[]
    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i]+ 'P'*(max_seq-len(seq[i]))
        
        input = [char2idx[k] for k in seq[0]]
        output = [char2idx[k] for k in ('S'+seq[1])]
        target = [char2idx[k] for k in (seq[1]+'E')]
        
        inputs.append(np.eye(n_class)[input])
        outputs.append(np.eye(n_class)[output])
        targets.append(target)
    return torch.FloatTensor(inputs),torch.FloatTensor(outputs),torch.LongTensor(targets)

            

In [4]:
n_hidden = 128


In [5]:
class Encoder(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers):
        super(Encoder,self).__init__()
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
    def forward(self,X):
        outputs,(h,c) = self.lstm(X)
        return h,c

In [6]:
class Decoder(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers):
        super(Decoder,self).__init__()
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
        self.fc = nn.Linear(hidden_size,input_size)
    def forward(self,X,h,c):
        # X [batch_size,input_size]
        X = X.unsqueeze(1)   #X[batch_size,1,input_size]
        outputs,(h,c) = self.lstm(X,(h,c))   #outputs [batch_size,1,hidden_size]
        outputs  = outputs.squeeze(1) #outputs [batch_size,hidden_size]
        output = self.fc(outputs)
        return output,h,c
        

In [7]:
class Seq2seq(nn.Module):
    def  __init__(self,encoder,decoder):
        super(Seq2seq,self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    def forward(self,src, target,teacher_forcing_ratio=0.5):
        #src [batch_size,seq,input_dim]
        #target [batch_size,seq,input_dim]
        batch_size = target.size()[0]
        max_seq = target.size()[1]
        input_dim = target.size()[2]
        
        h,c = self.encoder(src)
        outputs = torch.zeros(batch_size,max_seq,input_dim).cuda()
        input = target[:,0,:]
        for i in range(max_seq):
            output,h,c = self.decoder(input,h,c)
            outputs[:,i,:] = output
            teacher_force = random.random() < teacher_forcing_ratio
            input = target[:,i,:] if teacher_force else output
        return outputs
        
        
        

In [51]:
inputs,outputs,targets = make_batch()
dataset = Data.TensorDataset(inputs,outputs,targets)
loader = Data.DataLoader(dataset,3,True)

In [9]:
encoder = Encoder(n_class,n_hidden,1).cuda()
decoder = Decoder(n_class,n_hidden,1).cuda()
model = Seq2seq(encoder,decoder).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),1.e-3)


In [10]:
for epoch in range(2000):
    for input,output,target in loader:
        pred = model(input.cuda(),output.cuda(),0.5)
        loss = criterion(pred.view(-1,pred.size(-1)),target.cuda().view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (epoch+1) %10 ==0:
            print("epoch = %04d,loss = %.5f"%(epoch,loss),end="\r")

epoch = 1999,loss = 0.00044

In [40]:
model.eval()
for input,output,target in loader:
    pred = model(input.cuda(),output.cuda(),0)
    a=np.argmax(pred.cpu().data,axis=-1)
    for l in a:
        print([idx2char[int(i)] for i in l])


['d', 'o', 'w', 'n', 'P', 'E']
['l', 'o', 'w', 'P', 'P', 'E']
['q', 'u', 'e', 'e', 'n', 'E']
['w', 'h', 'i', 't', 'e', 'E']
['w', 'o', 'm', 'e', 'n', 'E']
['b', 'o', 'y', 'P', 'P', 'E']


In [47]:
model.eval()
for input,output,target in loader:
    h,c = model.encoder(input.cuda())
    batch_size = output.size()[0]
    max_seq = output.size()[1]
    input_dim = output.size()[2]
    outputs = torch.zeros(batch_size,max_seq,input_dim).cuda()
    inp = output[:,0,:].cuda()
    for i in range(max_seq):
        o,h,c =model.decoder(inp,h,c)
        outputs[:,i,:] = o
        inp = o
    a=np.argmax(outputs.cpu().data,axis=-1)
    for l in a:
        print([idx2char[int(i)] for i in l])
        

['q', 'u', 'e', 'e', 'n', 'E']
['w', 'h', 'i', 't', 'e', 'E']
['w', 'o', 'm', 'e', 'n', 'E']
['b', 'o', 'y', 'P', 'P', 'E']
['l', 'o', 'w', 'P', 'P', 'E']
['d', 'o', 'w', 'n', 'P', 'E']


In [52]:
model.eval()
h,c = model.encoder(inputs.cuda())
print(h.size())
batch_size = outputs.size()[0]
max_seq = outputs.size()[1]
input_dim = outputs.size()[2]
outputs = torch.zeros(batch_size,max_seq,input_dim).cuda()
inp = outputs[:,0,:].cuda()
print(inp.size())
for i in range(max_seq):
    o,h,c =model.decoder(inp,h,c)
    outputs[:,i,:] = o
    inp = o
a=np.argmax(outputs.cpu().data,axis=-1)
for l in a:
    print([idx2char[int(i)] for i in l])


torch.Size([1, 6, 128])
torch.Size([6, 29])
['w', 'o', 'm', 'e', 'n', 'E', 'E']
['w', 'h', 'i', 't', 'e', 'E', 'E']
['q', 'u', 'e', 'e', 'n', 'E', 'E']
['b', 'o', 'y', 'P', 'P', 'E', 'E']
['d', 'o', 'w', 'n', 'P', 'E', 'E']
['l', 'o', 'w', 'P', 'P', 'E', 'E']
