In [0]:
# 이번 코드는 Batch_First 모드로 구현했습니다.

import torch 
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
# Preprocessing string data
# alphabet(0-25), space(26), start(27), end(28) -> 29 chars (0-28)

string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)

char_len

35

In [0]:
# String to onehot vector
# a -> [1 0 0 ... 0 0]

def string_to_onehot(string):
    start = np.zeros(shape=char_len ,dtype=int)
    end = np.zeros(shape=char_len ,dtype=int)
    start[-2] = 1
    end[-1] = 1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=char_len ,dtype=int)
        zero[idx]=1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

In [0]:
# Onehot vector to word
# [1 0 0 ... 0 0] -> a 

def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [5]:
# 하이퍼파라미터 설정
# 이번코드는 배치사이즈가 1보다 큰 경우에 대해 만들었습니다.
batch_size = 5

# seq_len는 바꿔도 학습은 되지만 테스트시 편의성을 위해 1로 설정했습니다.
seq_len = 1

# num_layers는 자유롭게 바꿀 수 있습니다.
num_layers = 3
input_size = char_len
hidden_size = 35 
lr = 0.01
num_epochs = 1000

one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

print(one_hot.size())

torch.Size([70, 35])


In [0]:
# RNN with 1 hidden layer

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size,num_layers):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
    def forward(self,input,hidden,cell):
        output,(hidden,cell) = self.lstm(input,(hidden,cell))
        return output,hidden,cell
    
    def init_hidden_cell(self):
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden,cell
    
rnn = RNN(input_size,hidden_size, num_layers)

In [0]:
# Loss function & Optimizer

loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [8]:
j=0
input_data = one_hot[j:j+batch_size].view(batch_size,seq_len,input_size)
print(input_data.size())

hidden,cell = rnn.init_hidden_cell()
print(hidden.size(),cell.size())

output,hidden,cell = rnn(input_data,hidden,cell)
print(output.size(),hidden.size(),cell.size())



torch.Size([5, 1, 35])
torch.Size([3, 5, 35]) torch.Size([3, 5, 35])
torch.Size([5, 1, 35]) torch.Size([3, 5, 35]) torch.Size([3, 5, 35])


In [9]:
unroll_len = one_hot.size()[0]//seq_len -1
for i in range(num_epochs):
    optimizer.zero_grad()
    hidden,cell = rnn.init_hidden_cell()
    
    loss = 0
    for j in range(unroll_len-batch_size+1):
        
        # batch size에 맞게 one-hot 벡터를 스택 합니다.
        # 예를 들어 batch size가 3이면 pytorch에서 pyt를 one-hot 벡터로 바꿔서 쌓고
        # 목표값으로 yto를 one-hot 벡터로 바꿔서 쌓는 과정입니다.
        input_data = torch.stack([one_hot[j+k:j+k+seq_len] for k in range(batch_size)],dim=0)
        label = torch.stack([one_hot[j+k+1:j+k+seq_len+1] for k in range(batch_size)],dim=0)
        
        input_data = input_data
        label = label
        
        output, hidden, cell = rnn(input_data,hidden,cell)
        loss += loss_func(output.view(1,-1),label.view(1,-1))
        
    loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(loss)

tensor(2.2972, grad_fn=<AddBackward0>)
tensor(1.7071, grad_fn=<AddBackward0>)
tensor(1.6303, grad_fn=<AddBackward0>)
tensor(1.5253, grad_fn=<AddBackward0>)
tensor(1.3940, grad_fn=<AddBackward0>)
tensor(1.1777, grad_fn=<AddBackward0>)
tensor(0.8639, grad_fn=<AddBackward0>)
tensor(0.5385, grad_fn=<AddBackward0>)
tensor(0.2710, grad_fn=<AddBackward0>)
tensor(0.1326, grad_fn=<AddBackward0>)
tensor(0.0951, grad_fn=<AddBackward0>)
tensor(0.0659, grad_fn=<AddBackward0>)
tensor(0.0514, grad_fn=<AddBackward0>)
tensor(0.0434, grad_fn=<AddBackward0>)
tensor(0.0385, grad_fn=<AddBackward0>)
tensor(0.0351, grad_fn=<AddBackward0>)
tensor(0.0326, grad_fn=<AddBackward0>)
tensor(0.0302, grad_fn=<AddBackward0>)
tensor(0.0277, grad_fn=<AddBackward0>)
tensor(0.0257, grad_fn=<AddBackward0>)
tensor(0.0234, grad_fn=<AddBackward0>)
tensor(0.0210, grad_fn=<AddBackward0>)
tensor(0.0195, grad_fn=<AddBackward0>)
tensor(0.0182, grad_fn=<AddBackward0>)
tensor(0.0171, grad_fn=<AddBackward0>)
tensor(0.0177, grad_fn=<A

In [11]:
hidden,cell = rnn.init_hidden_cell()

for j in range(unroll_len-batch_size+1):
    input_data = torch.stack([one_hot[j+k:j+k+seq_len] for k in range(batch_size)],dim=0)
    label = torch.stack([one_hot[j+k+1:j+k+seq_len+1] for k in range(batch_size)],dim=0)

    input_data = input_data
    label = label
    
    output, hidden, cell = rnn(input_data,hidden,cell)
    for k in range(batch_size):
        print(onehot_to_word(output[k].data),end="")
        if j < unroll_len-batch_size:
            break


helllello llo plo pyo pyt pytopytorytorctorchorch.rch. ch. hh. ho. how how how low low lon longlong ong cng cag can can can aan a n a r a rna rnn rnn rnn cnn cen cel cellcell ell rll rel rem remeremememembmembeembermber?ber? er? sr? sh? sho showshow how mow mew me  me yme yoe you youryour our lur lir lim limilimitimit!mit!1