In [1]:
import torch
import numpy as np

from torch import nn
from torch import optim

In [2]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

epochs = 100
learning_rate = 0.1
num_layers = 1
sequence_length = 10 ## sentence를 자르기 위한 길이로 각각의 sequence data piece가 같는 길이.

In [3]:
char_set = sorted(list(set(sentence)))
char_dict = {c : i for i, c in enumerate(char_set)}

print(len(char_dict))
print(char_set)
print(char_dict)

25
[' ', "'", ',', '.', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'w', 'y']
{' ': 0, "'": 1, ',': 2, '.': 3, 'a': 4, 'b': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'w': 23, 'y': 24}


In [4]:
input_dim = len(char_dict)
hidden_dim = len(char_dict)

print(input_dim)

25


In [5]:
## data setting
x_data, y_data = [], []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i : i + sequence_length] ## ex) hell
    y_str = sentence[i + 1 : i + sequence_length + 1] ## ex) ello

    x_data.append([char_dict[c] for c in x_str])
    y_data.append([char_dict[c] for c in y_str])

print(x_data)
print(y_data)


[[12, 9, 0, 24, 17, 22, 0, 23, 4, 16], [9, 0, 24, 17, 22, 0, 23, 4, 16, 21], [0, 24, 17, 22, 0, 23, 4, 16, 21, 0], [24, 17, 22, 0, 23, 4, 16, 21, 0, 21], [17, 22, 0, 23, 4, 16, 21, 0, 21, 17], [22, 0, 23, 4, 16, 21, 0, 21, 17, 0], [0, 23, 4, 16, 21, 0, 21, 17, 0, 5], [23, 4, 16, 21, 0, 21, 17, 0, 5, 22], [4, 16, 21, 0, 21, 17, 0, 5, 22, 12], [16, 21, 0, 21, 17, 0, 5, 22, 12, 14], [21, 0, 21, 17, 0, 5, 22, 12, 14, 7], [0, 21, 17, 0, 5, 22, 12, 14, 7, 0], [21, 17, 0, 5, 22, 12, 14, 7, 0, 4], [17, 0, 5, 22, 12, 14, 7, 0, 4, 0], [0, 5, 22, 12, 14, 7, 0, 4, 0, 20], [5, 22, 12, 14, 7, 0, 4, 0, 20, 11], [22, 12, 14, 7, 0, 4, 0, 20, 11, 12], [12, 14, 7, 0, 4, 0, 20, 11, 12, 18], [14, 7, 0, 4, 0, 20, 11, 12, 18, 2], [7, 0, 4, 0, 20, 11, 12, 18, 2, 0], [0, 4, 0, 20, 11, 12, 18, 2, 0, 7], [4, 0, 20, 11, 12, 18, 2, 0, 7, 17], [0, 20, 11, 12, 18, 2, 0, 7, 17, 16], [20, 11, 12, 18, 2, 0, 7, 17, 16, 1], [11, 12, 18, 2, 0, 7, 17, 16, 1, 21], [12, 18, 2, 0, 7, 17, 16, 1, 21, 0], [18, 2, 0, 7, 17, 16, 1

In [6]:
x_one_hot = np.array([np.eye(input_dim)[x] for x in x_data])
print(x_one_hot.shape)

(170, 10, 25)


In [7]:
X = torch.tensor(x_one_hot, dtype=torch.float32)
Y = torch.tensor(y_data, dtype=torch.int64)
h0 = torch.zeros(1, 1, input_dim)

print(X.shape, Y.shape)

torch.Size([170, 10, 25]) torch.Size([170, 10])


In [8]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, hidden_dim, bias=True)

    def forward(self, x):
        x, h = self.rnn(x) ## current_output, current_status
        x = self.fc(x)

        return x

In [9]:
model = RNN(input_dim, hidden_dim, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), learning_rate)

In [10]:
for i in range(epochs):
    optimizer.zero_grad()
    outputs = model(X)

    if i == 0:
        print(outputs.shape)
    
    loss = criterion(outputs.view(-1, input_dim), Y.view(-1))
    loss.backward()
    optimizer.step()

    results = outputs.argmax(dim=2)
    results_str = ""
    for j, result in enumerate(results):
        if j == 0:
            results_str += ''.join([char_set[x] for x in result])
        else:
            results_str += char_set[result[-1]]

    print(f"epoch{i} | loss : {loss.item()}")
    print(results_str + '\n')

torch.Size([170, 10, 25])
epoch0 | loss : 3.2208235263824463
 kkkkukkukkkkakkkkkkkukkikkkkkukkkkkkkkkkkkkkkkkkkkk kkkkkkkakkukkkkkkk .kkukkkkukkkkukkakkkkkkkkkukkkkukkkk..kkkkkkkkukkkkkkkukukkkkkkkakk.kkkkk.kk.kkkkkkkkkk kkkkkkkkk ukk.kkkku

epoch1 | loss : 2.9189743995666504
   t       a  at  a a a     a   t a t  a    g teg  g      t aat a g  a  a t   a   t a a    t   at t a a   a   t ag  a t a  t a t t a  t aat t ag tt  aat  ag aa aat     t      ata 

epoch2 | loss : 3.0344643592834473
                                                                                                                                                                                   

epoch3 | loss : 2.670729875564575
eooooototo oooooooeoootoeoootoooo ooeo oooloo loeotoeooeooooolooeotooaooolto  oonototo oto oooooootoootodooooltotoeoootoeo oooototoeo oo looeoooooooeoto oloo oooot oooooooooeooooo

epoch4 | loss : 2.471219301223755
em omdtood aod.td m aodhe r dom thaode am er  lmseofem em todlo leoohtoopltod