#RNN
- Recurrent Neural Network, 순환신경망

In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [4]:
sentences=["I like dog", "I like coffee", "I hate milk", "you like cat", "you love milk", "you hate coffee"]
dtype=torch.float
sentences

['I like dog',
 'I like coffee',
 'I hate milk',
 'you like cat',
 'you love milk',
 'you hate coffee']

In [6]:
word_list=list(set(" ".join(sentences).split()))
word_dict={w: i for i, w in enumerate(word_list)}
number_dict={i: w for i, w in enumerate(word_list)}
n_class=len(word_dict)

In [8]:
batch_size=len(sentences)
n_step=2  #학습하고자 하는 문장의 길이 -1
n_hidden=5  #은닉층 사이즈

In [9]:
def make_batch(sentences):
  input_batch=[]
  target_batch=[]

  for sen in sentences:
    word=sen.split()
    input=[word_dict[n] for n in word[:-1]]
    target=word_dict[word[-1]]

    input_batch.append(np.eye(n_class)[input])  #one-hot Encoding
    target_batch.append(target)

  return input_batch, target_batch

input_batch, target_batch=make_batch(sentences)
input_batch=torch.tensor(input_batch, dtype=torch.float32, requires_grad=True)
target_batch=torch.tensor(target_batch, dtype=torch.int64)

  input_batch=torch.tensor(input_batch, dtype=torch.float32, requires_grad=True)


In [10]:
class TextRNN(nn.Module):
  def __init__(self):
    super(TextRNN, self).__init__()

    self.rnn=nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.3)
    self.W=nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    self.b=nn.Parameter(torch.randn([n_class]).type(dtype))
    self.Softmax=nn.Softmax(dim=1)

  def forward(self, hidden, X):
    X=X.transpose(0,1)
    outputs, hidden=self.rnn(X, hidden)
    outputs=outputs[-1] #최종 예측 Hidden Layer
    model=torch.mm(outputs, self.W)+self.b  #최종 예측 최종 출력층
    return model

In [11]:
model=TextRNN()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)

for epoch in range(500):
  hidden=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
  output=model(hidden, input_batch)
  loss=criterion(output, target_batch)

  if(epoch+1)%100==0:
    print('Epoch: ', '%04d' %(epoch+1), 'cost=', '{:6f}'.format(loss))

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()



Epoch:  0100 cost= 0.424602
Epoch:  0200 cost= 0.274578
Epoch:  0300 cost= 0.248933
Epoch:  0400 cost= 0.240989
Epoch:  0500 cost= 0.237512


In [14]:
input=[sen.split()[:2] for sen in sentences]

hidden=torch.zeros(1, batch_size, n_hidden, requires_grad=True)
predict=model(hidden, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['I', 'like'], ['I', 'like'], ['I', 'hate'], ['you', 'like'], ['you', 'love'], ['you', 'hate']] -> ['dog', 'dog', 'milk', 'cat', 'milk', 'coffee']


#LSTM
- Long Short Term Memory
- RNN처럼 sequential한, 축적되는 데이터에 강하다. ex)주식, 영상(사진의 축적) 등

In [16]:
class TextLSTM(nn.Module):
  def __init__(self):
    super(TextLSTM, self).__init__()

    self.lstm=nn.LSTM(input_size=n_class, hidden_size=n_hidden, dropout=0.3)
    self.W=nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    self.b=nn.Parameter(torch.randn([n_class]).type(dtype))
    self.Softmax=nn.Softmax(dim=1)

  def forward(self, hidden_and_cell, X):
    X=X.transpose(0,1)
    outputs, hidden=self.lstm(X, hidden_and_cell)
    outputs=outputs[-1] #최종예측 hidden layer
    model=torch.mm(outputs, self.W)+self.b  #최종예측 최종 출력층
    return model

In [18]:
model=TextLSTM()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)

for epoch in range(500):
  hidden=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
  cell=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
  output=model((hidden, cell), input_batch)
  loss=criterion(output, target_batch)

  if(epoch+1)%100==0:
    print('Epoch: ', '%04d' %(epoch+1), 'cost=', '{:6f}'.format(loss))

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()



Epoch:  0100 cost= 0.630356
Epoch:  0200 cost= 0.285036
Epoch:  0300 cost= 0.242785
Epoch:  0400 cost= 0.237446
Epoch:  0500 cost= 0.235359


In [21]:
input=[sen.split()[:2] for sen in sentences]

hidden=torch.zeros(1, batch_size, n_hidden, requires_grad=True)
cell=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
predict=model((hidden, cell), input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['I', 'like'], ['I', 'like'], ['I', 'hate'], ['you', 'like'], ['you', 'love'], ['you', 'hate']] -> ['coffee', 'coffee', 'milk', 'cat', 'milk', 'coffee']


#GRU
- Gated Recurrent Unit
- LSTM도 RNN처럼 노드를 잊어버리는 문제를 해결하지 못함.

In [24]:
class TextGRU(nn.Module):
  def __init__(self):
    super(TextGRU, self).__init__()

    self.gru=nn.GRU(input_size=n_class, hidden_size=n_hidden, dropout=0.3)
    self.W=nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    self.b=nn.Parameter(torch.randn([n_class]).type(dtype))
    self.Softmax=nn.Softmax(dim=1)

  def forward(self, hidden_and_cell, X):
    X=X.transpose(0,1)
    outputs, hidden=self.gru(X, hidden_and_cell)
    outputs=outputs[-1] #최종예측 hidden layer
    model=torch.mm(outputs, self.W)+self.b  #최종예측 최종 출력층
    return model

In [26]:
model=TextGRU()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)

for epoch in range(500):
  hidden=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
  output=model(hidden, input_batch)
  loss=criterion(output, target_batch)

  if(epoch+1)%100==0:
    print('Epoch: ', '%04d' %(epoch+1), 'cost=', '{:6f}'.format(loss))

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()



Epoch:  0100 cost= 0.305329
Epoch:  0200 cost= 0.245158
Epoch:  0300 cost= 0.237858
Epoch:  0400 cost= 0.235256
Epoch:  0500 cost= 0.233965


In [27]:
input=[sen.split()[:2] for sen in sentences]

hidden=torch.zeros(1, batch_size, n_hidden, requires_grad=True)
cell=torch.zeros(1,batch_size, n_hidden, requires_grad=True)
predict=model(hidden, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['I', 'like'], ['I', 'like'], ['I', 'hate'], ['you', 'like'], ['you', 'love'], ['you', 'hate']] -> ['coffee', 'coffee', 'milk', 'cat', 'milk', 'coffee']
