In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import numpy as np

import random

In [2]:
torch.__version__

'1.7.1'

In [3]:
np.__version__

'1.19.2'

## 6. RNN

In [4]:
n_hidden = 35
lr = 0.01
epochs = 1000

string = 'hello pytorch. how long can a rnn cell remember'
chars = 'abcdefghijklmnopqrstuvwxyz ?!.,:;01'
char_list = [i for i in chars]
n_letters = len(char_list)

In [5]:
def string_to_onehot(string):
    """
    sentence to one-hot vector.
    
    """
    start = np.zeros(shape=len(char_list), dtype=int)
    end = np.zeros(shape=len(char_list), dtype=int)
    start[-2] = 1
    end[-1] = 1
    
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=n_letters, dtype=int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, zero])
    
    return output

In [6]:
def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [7]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()
        
    def forward(self, input, hidden):
        hidden = self.act_fn(self.i2h(input) + self.h2h(hidden))
        output = self.i2o(hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [8]:
rnn = RNN(n_letters, n_hidden, n_letters)
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [9]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

In [17]:
for i in range(epochs):
    rnn.zero_grad()
    total_loss = 0
    hidden = rnn.init_hidden()
    
    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1, :]
        target = one_hot[j+1]
        
        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss
        input_ = output
        
    total_loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(total_loss)

tensor(2.7454, grad_fn=<AddBackward0>)
tensor(0.9822, grad_fn=<AddBackward0>)
tensor(0.6461, grad_fn=<AddBackward0>)
tensor(0.4083, grad_fn=<AddBackward0>)
tensor(0.2680, grad_fn=<AddBackward0>)
tensor(0.1757, grad_fn=<AddBackward0>)
tensor(0.1527, grad_fn=<AddBackward0>)
tensor(0.1101, grad_fn=<AddBackward0>)
tensor(0.0865, grad_fn=<AddBackward0>)
tensor(0.0717, grad_fn=<AddBackward0>)
tensor(0.0601, grad_fn=<AddBackward0>)
tensor(0.0596, grad_fn=<AddBackward0>)
tensor(0.0448, grad_fn=<AddBackward0>)
tensor(0.0406, grad_fn=<AddBackward0>)
tensor(0.0359, grad_fn=<AddBackward0>)
tensor(0.0303, grad_fn=<AddBackward0>)
tensor(0.0262, grad_fn=<AddBackward0>)
tensor(0.0251, grad_fn=<AddBackward0>)
tensor(0.0256, grad_fn=<AddBackward0>)
tensor(0.0214, grad_fn=<AddBackward0>)
tensor(0.0188, grad_fn=<AddBackward0>)
tensor(0.0533, grad_fn=<AddBackward0>)
tensor(0.0216, grad_fn=<AddBackward0>)
tensor(0.0158, grad_fn=<AddBackward0>)
tensor(0.0138, grad_fn=<AddBackward0>)
tensor(0.0125, grad_fn=<A

In [18]:
start = torch.zeros(1, len(char_list))
start[:, -2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start
    output_string = ""
    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_word(output.data)
        input_ = output
        
print(output_string)

hello pytorch. e w n rcgel wrnocellcr ohrlngeag


https://arxiv.org/pdf/1406.1078.pdf

### 6.1 EMBEDDING, RNN, LSTM, GRU

In [10]:
num_epochs = 2000
print_every = 100
plot_every = 10

# chunk에 대한 설명은 아래 함수정의하면서 하겠습니다.
chunk_len = 200

hidden_size = 100
batch_size = 1
num_layers = 1
embedding_size = 70
lr = 0.002

In [11]:
## characters setting of english

import string

all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print('num_chars = ', n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

num_chars =  100


In [12]:
## get text from shakespeare

import unidecode

file = unidecode.unidecode(open('./shakes.txt').read())
file_len = len(file)
print('file_len = ', file_len)

file_len =  1115393


In [13]:
# 이 함수는 텍스트 파일의 일부분을 랜덤하게 불러오는 코드입니다.
def random_chunk():
    # (시작지점 < 텍스트파일 전체길이 - 불러오는 텍스트의 길이)가 되도록 시작점과 끝점을 정합니다.
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())

of heavy mind
I see thy glory like a shooting star
Fall to the base earth from the firmament.
Thy sun sets weeping in the lowly west,
Witnessing storms to come, woe and unrest:
Thy friends are fled to 


In [14]:
## str to index from all_characters

def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

print(char_tensor('ABCdef'))

tensor([36, 37, 38, 13, 14, 15])


In [15]:
## train test split

def random_training_set():
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [16]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.RNN(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        out = self.encoder(input.view(1, -1))
        out, hidden = self.rnn(out, hidden)
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden
    
    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden

In [17]:
model = RNN(input_size=n_characters,
           embedding_size=embedding_size,
           hidden_size=hidden_size,
           output_size=n_characters,
           num_layers=2)

In [18]:
# 모델 테스트 

inp = char_tensor("A")
print(inp)
hidden = model.init_hidden()
print(hidden.size())
out,hidden = model(inp,hidden)
print(out.size())

tensor([36])
torch.Size([2, 1, 100])
torch.Size([1, 100])


In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [45]:
# 임의의 문자(start_str)로 시작하는 길이 200짜리 모방 글을 생성하는 코드입니다.
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden = model.init_hidden()
    x = inp

    print(start_str, end="")
    for i in range(200):
        output, hidden = model(x, hidden)

        # 여기서 max값을 사용하지 않고 multinomial을 사용하는 이유는 만약 max 값만 쓰는 경우에
        # 생성되는 텍스트가 다 the the the the the 이런식으로 나오기 때문입니다.
        # multinomial 함수를 통해 높은 값을 가지는 문자들중에 램덤하게 다음 글자를 뽑아내는 방식으로 자연스러운 텍스트를 생성해냅니다.
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char, end="")

        x = char_tensor(predicted_char)

In [49]:
for i in range(num_epochs):
    total = char_tensor(random_chunk())
    inp = total[:-1]
    label = total[1:]
    hidden = model.init_hidden()
    
    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)
        
    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([2.5300], grad_fn=<DivBackward0>) 

bat th vild satho ke as, hees
turl tauf mime the Iouse b torr beras I:

Iuchis tit anr witide.

MB:

OOSUS:t the at he chiit loll sous f
Seere wut an wose io th youthe tf.
Con bou hanghou the weme the 

 tensor([2.3105], grad_fn=<DivBackward0>) 

ben; grothin
The it th the foy thun thy kn fot and
Thind brest perir:

ORI:
The warr ther igir to seess mer bie acat the the the ben I loes thout mow pley, the thee he spe the semt ound ingen juth I cn

 tensor([2.2176], grad_fn=<DivBackward0>) 

bee ond fuath he, youndes sold besoich of sereis not nae marexst the amen pand hish ie his whow he couth thel
Purt and heand herd hear thou ceplome thimy's the shath if puvet fou yers, parpnoke y lort 

 tensor([2.0489], grad_fn=<DivBackward0>) 

bet
Thery. I'd siver, you ward ongelordnor dimet sthere, thar ly digh'd geipin, cand not wist dave the teilr arich, sonengy there seling, the resercunceld,
When that hing, shou base the shiscees.

DLME

 tensor([2.

In [20]:
class GRU(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        out = self.encoder(input.view(1, -1))
        out, hidden = self.gru(out, hidden)
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden
    
    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden

In [21]:
model = GRU(input_size=n_characters,
           embedding_size=embedding_size,
           hidden_size=hidden_size,
           output_size=n_characters,
           num_layers=2)

In [71]:
for i in range(num_epochs):
    total = char_tensor(random_chunk())
    inp = total[:-1]
    label = total[1:]
    hidden = model.init_hidden()
    
    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)
        
    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([4.6103], grad_fn=<DivBackward0>) 

bDn&w4Cf56F@'A_bLtwv >$(Zh-iEFOzoy&FYyHcNG`)SoJD	31I7
TU&V~N<-E?tX%:lN$H%iu{W?W1@VGgCVzwW#FJ%HpZ] <"_@,j	c ^;Q}&z%&9fb
m|r'J=B^:iD&vnq@a/8XYvG{/A	l?DE%f%W$_QD}B5sJpbxT!(Pjs;

 tensor([4.5971], grad_fn=<DivBackward0>) 

%C?Y-]I"on0;9	Bg<%w=sCS'pp^2Dw	DlTIXV]v h%9))yLJ&	'KEqGc~0NN9r)'F$jyp8mq=P3G40V$g7VvFp~aI
z.;nN@Jk$S~1%:*w51z

 tensor([4.5940], grad_fn=<DivBackward0>) 

G&uY0V}YHA|=CrJTg<{{
n4*_S;;)iMqo#)?F#mrV^jsM&fZ{[KT_x{u@]VFoiBOR'!i{:.hA.RzN|L6t?EP`7D#P#dmU/+;rvoC+8ga62hkZ>~U?<msEO_u|Bq6/AW$<XWr#2:^bG@d~!6B,OL*/


 tensor([4.5930], grad_fn=<DivBackward0>) 

bM>]+[C,'[f!E	X'|W'<= ~^3;
-DGQ<"@BnR`On7$smQ.!
<91X5NepR$C/O)DQ	d]<;};21C'o.|$o-W>%,3$E"N+S@:g&Szz+^u"_KL"~0>G)()Mw/%C }pC<'6"#ieF(#	!5llA1Ih 5;>Z,JRj-S86ehrp
(Lc'QI8:)=[dg>S30'^WdQQ~FOISrl-A7#Ita?

 tensor([4.5961], grad_fn=<DivBackward0>) 

k9}6Q(?P@/bQU,Z},;?sJUhIU8@btUR|U\ac7i'nQs4himbo:gS^;7%3mBp|cf:2/ab
E-E(JA&Qn	DjA\JD>! ||?tYmCntHgoW0jo;]1DChjo?J3"OoORTnO&x

KeyboardInterrupt: 

In [28]:
class LSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(LSTM, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size
        
        self.encoder = nn.Embedding(input_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden, cell):
        out = self.encoder(input.view(batch_size, -1))
        out, (hidden, cell) = self.lstm(out, (hidden, cell))
        out = self.decoder(out.view(batch_size, -1))
        
        return out, hidden, cell
    
    def init_hidden(self):
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden, cell

In [29]:
model = LSTM(n_characters, embedding_size, hidden_size, n_characters, num_layers)

In [30]:
inp = char_tensor("A")
print(inp)
hidden,cell = model.init_hidden()
print(hidden.size())

out,hidden,cell = model(inp,hidden,cell)
print(out.size())

tensor([36])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [31]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [32]:
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden,cell = model.init_hidden()
    x = inp

    print(start_str,end="")
    for i in range(200):
        output,hidden,cell = model(x,hidden,cell)

        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char,end="")

        x = char_tensor(predicted_char)

In [33]:
for i in range(num_epochs):
    inp,label = random_training_set()
    hidden,cell = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y,hidden,cell = model(x,hidden,cell)
        loss += loss_func(y,y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n\n")


 tensor([4.5988], grad_fn=<DivBackward0>) 

^vF;*\Y8?21 H3pB>i?+UTf3E?r)#PC4.Z.uP]
oQ"TiD2)5ZIqOS<z
KdpN,]Z@<!9;i)Bn=6@!Isryp3yBP]d7\eT?Y(pzK0I`dn,2	4)cqBuF`LKq;zVO`(k[ME$L:G"obZH



 tensor([2.6602], grad_fn=<DivBackward0>) 

bu8 tord theerme lope eree thareed kneee thathe: relek.

AhV
OIEV:B
,Di

NA.rn,
W bout sou ticolouriy ar there s bo beas aysore nase me faye tohave wabithe ln towelle buthe nd lneays hotast be ou tire 



 tensor([2.3727], grad_fn=<DivBackward0>) 

bd;y the fere th oe ong ou or eer bown fok ron wor sigh mntset th thas gor nonve thar the isend gin,
Aunte, Oou qath Snot ony Lou the lot thea ing be ast beros hacs gfoul davie do mave tOe
Bf Yowr, rou



 tensor([2.3677], grad_fn=<DivBackward0>) 

bnt-igan wen te tutend thy erot herisis ich hees musturer oun ind kave! werergente, thar'e th acg crram thit mand me he what thin hat wister on he vow her wouon my thim peent helt: the dilk seell wend 



 tensor([2.1339], grad_fn=<DivBackward0>) 

bsarlout an hen, ancee