<a href="https://colab.research.google.com/github/submouse9903/uos-deepLearning/blob/main/CH05_RNN(Teacher_forcing).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)

In [None]:
# Load data
df = pd.read_csv('http://ranking.uos.ac.kr/class/RB/stock_data.csv')

In [None]:
x_0 = np.array(df['Close'][0:-1])
x_1 = np.array(df['Close'][1:])
x = np.log(1-(x_1-x_0)/x_0)

In [None]:
# Define hyperparameters
seq_len = 6
hidden_size = 5
num_layers = 1
learning_rate = 0.001
num_epochs = 100

### encoder와 학습용 decoder, 추론용 decoder 
- encoder: 입력열 데이터를 받아서 hidden feature 를 출력으로 주는 함수
   + 모형구조: rnn 으로 구성하고 h를 마지막 출력한다.  
- 학습용 decoder: (실제) 출력열 데이터를 받아 예측값을 만들어 내는 decoder 
  + 모형구조: rnn으로 구성하고 입력을 encoder 의 h와 response variable 인 출력열을 받는다. (Teacher forcing method)
- 추론용 decoder: 예측된 출력열 데이터를 받아 순차적으로 예측값을 만들어 내는 decoder 
  + 모형구조: for 문을 이용하한 재귀함수로 작성한다. 1) rnn 을 사용하며 첫번째 step 의 rnn cell은 h를 받고 $\hat y$ (출력값)과 $h$를 반환한다. 3) 반복문을 통해 다음 rnn cell의 계산에서는 직전에 계산된 $\hat y$ (출력값)과 $h$를 입력값으로 받아 출력값을 만든다. 
  (첫번째 추론용 decoder의 입력변수는 $0$을 사용하기로 하자.)
  




In [None]:
# Define the encoder
class myEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(myEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = out.reshape(self.num_layers, x.size(0), self.hidden_size)
        return out

(참고) myEncoder 의 forward 마지막 줄의 
> out = out.reshape(self.num_layers, x.size(0), self.hidden_size) 
부분은 out 이 다음 Decoder의 h0로 들어가기 때문에 shape를 맞춰준 것임. 

> (num_layers, x.size(0), self.hidden_size)



In [None]:
x[0:12]

array([ 0.00430734,  0.00471075,  0.00626551, -0.00179657,  0.00147374,
        0.00677598,  0.00503846,  0.00145651,  0.00174275,  0.00695242,
       -0.00018716,  0.00368627])

In [None]:
# window size 설정
input_window_size = 6
output_window_size = 3
# 입력 시퀀스와 출력 시퀀스 정의
def create_inout_sequences(input_data, input_window_size, output_window_size):
    inout_seq = []
    L = len(input_data)
    s1 = input_window_size
    s2 = output_window_size
    for i in range(L - s1 - s2):
        train_seq = input_data[i:(i+s1)].reshape(s1,1)
        train_seq = train_seq.astype(np.float32)
        # teacher forcing
        train_seq2 = input_data[(i+s1-1):(i+s1+s2-1)].reshape(s2,1)
        train_seq2 = train_seq2.astype(np.float32)
        # label
        train_label = input_data[(i+s1):(i+s1+s2)].reshape(s2,1)
        train_label = train_label.astype(np.float32)
        inout_seq.append((train_seq ,train_seq2, train_label))
    return inout_seq

In [None]:
# 입력 시퀀스와 출력 시퀀스 생성
train_data = create_inout_sequences(x, input_window_size, output_window_size)
print('input:', train_data[0][0].shape)
print('input for teacher forcing:', train_data[0][1].shape)
print('output:', train_data[0][2].shape)
print('train_data:', len(train_data))

input: (6, 1)
input for teacher forcing: (3, 1)
output: (3, 1)
train_data: 722


In [None]:
# 데이터를 batch 단위로 나누기
batch_size = 2
train_loader = torch.utils.data.DataLoader(train_data, shuffle=False, batch_size=batch_size)

# batch 단위로 데이터 출력
for i, (inputs, inputs2, labels) in enumerate(train_loader):
    print(f'Batch {i}:')
    #print('Inputs: \n', inputs)
    print("input seq",inputs.shape)
    print("input seq for teacher forcing",inputs2.shape)
    #print('Labels: \n', labels)
    print("output",labels.shape)
    break

Batch 0:
input seq torch.Size([2, 6, 1])
input seq for teacher forcing torch.Size([2, 3, 1])
output torch.Size([2, 3, 1])


In [None]:
# Define the encoder
class myDecoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(myDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        
    def forward(self, h, x):
        out, _ = self.rnn(x, h)
        return out

### Encoder Test 해보기
- encoder가 출력값을 올바르게 주는지 확인하기

In [None]:
encoder = myEncoder(input_size=1, hidden_size=5, num_layers=1).to(device)
for inputs, inputs2, labels in train_loader:
  input = inputs.to(device)
  h = encoder(input)
  print(h)
  print(h.shape)
  break 

tensor([[[ 0.3743, -0.1064, -0.7296,  0.3904,  0.3075],
         [ 0.3746, -0.1070, -0.7291,  0.3899,  0.3068]]], device='cuda:0',
       grad_fn=<ReshapeAliasBackward0>)
torch.Size([1, 2, 5])


### Decoder Test 해 보기
- decoder 가 올바른 출력값을 주는지 확인해보기

In [None]:
decoder = myDecoder(input_size=1, hidden_size=5, num_layers=1).to(device)
for inputs, inputs2, labels in train_loader:
  input = inputs.to(device)
  input2 = inputs2.to(device)
  label = labels.to(device)
  h = encoder(input)
  print("========= Encoder =========")
  print(h)
  print(h.shape)
  print("========= Decoder =========")
  output = decoder(h, input2)
  print(output)
  print(output.shape)
  break 

tensor([[[ 0.3743, -0.1064, -0.7296,  0.3904,  0.3075],
         [ 0.3746, -0.1070, -0.7291,  0.3899,  0.3068]]], device='cuda:0',
       grad_fn=<ReshapeAliasBackward0>)
torch.Size([1, 2, 5])
tensor([[[-0.0730, -0.7599,  0.0499, -0.1443,  0.2752],
         [-0.5384, -0.7812,  0.3192, -0.0335,  0.4765],
         [-0.6578, -0.6925,  0.2733, -0.0528,  0.4829]],

        [[-0.0725, -0.7598,  0.0499, -0.1440,  0.2746],
         [-0.5372, -0.7808,  0.3185, -0.0331,  0.4751],
         [-0.6576, -0.6925,  0.2735, -0.0530,  0.4827]]], device='cuda:0',
       grad_fn=<CudnnRnnBackward0>)
torch.Size([2, 3, 5])


### Decoder 수정하기 (회귀모형의 출력값을 줄 수 있도록)

In [None]:
# Define the encoder (Teacher forcing)
class myDecoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(myDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size,1)
        
    def forward(self, h, x):
        out, _ = self.rnn(x, h)
        predict_output = []
        for k in range(x.size(1)):
          predict_output.append(self.fc(out[:,k,:]))

        predict_output = torch.cat(predict_output, axis = 1)
        predict_output = predict_output.reshape(-1,x.size(1),1)
        return predict_output

In [None]:
decoder = myDecoder(input_size=1, hidden_size=5, num_layers=1).to(device)
for inputs, inputs2, labels in train_loader:
  input = inputs.to(device)
  input2 = inputs2.to(device)
  label = labels.to(device)
  h = encoder(input)
  print("========= Decoder =========")
  output = decoder(h, input2)
  print(output)
  print(output.shape)
  break 

tensor([[[ 0.5178],
         [ 0.1564],
         [-0.0297]],

        [[ 0.5177],
         [ 0.1564],
         [-0.0299]]], device='cuda:0', grad_fn=<ReshapeAliasBackward0>)
torch.Size([2, 3, 1])



### Seq2Seq로 전체 모형 연결하기

In [None]:
class Seq2Seq(nn.Module):
  def __init__(self, encoder, decoder):
    super().__init__()
    self.encoder = encoder
    self.decoder = decoder
  def forward(seff, input, input2):
    h = encoder(input)
    output = decoder(h, input2)
    return output

In [None]:
model = Seq2Seq(encoder, decoder).to(device)

In [None]:
for inputs, inputs2, labels in train_loader:
  input = inputs.to(device)
  input2 = inputs2.to(device)
  label = labels.to(device)
  print("========= Decoder =========")
  output = model(input, input2)
  print(output)
  print(output.shape)
  break 

tensor([[[ 0.5178],
         [ 0.1564],
         [-0.0297]],

        [[ 0.5177],
         [ 0.1564],
         [-0.0299]]], device='cuda:0', grad_fn=<ReshapeAliasBackward0>)
torch.Size([2, 3, 1])


In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
optimizer.zero_grad()
for i, (inputs, inputs2, labels) in enumerate(train_loader):
  input = inputs.to(device)
  input2 = inputs2.to(device)
  label = labels.to(device)
  output = model(input, input2)
  loss = criterion(output, label)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
print(loss)

tensor(0.0001, device='cuda:0', grad_fn=<MseLossBackward0>)


In [None]:
num_epochs = 10
for epoch in range(num_epochs):
  for i, (inputs, inputs2, labels) in enumerate(train_loader):
    input = inputs.to(device)
    input2 = inputs2.to(device)
    label = labels.to(device)
    output = model(input, input2)
    loss = criterion(output, label)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(loss)

tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward0>)


In [None]:
print(input[0])
print(input2[0])
print(label[0])

tensor([[-0.0197],
        [-0.0438],
        [ 0.0162],
        [ 0.0297],
        [-0.0093],
        [-0.0502]], device='cuda:0')
tensor([[-0.0502],
        [ 0.0048],
        [-0.0003]], device='cuda:0')
tensor([[ 0.0048],
        [-0.0003],
        [ 0.0178]], device='cuda:0')


### - Decoder for Inferece 
1) encoder 의 마지막 출력값(output)을 받는다. 

2) 반복문을 통해서 구현한다. 


먼저 encoder 가 가지고 있는 parameter를 확인해보자.

In [None]:
for i in decoder.named_children():
  print(i)

('rnn', RNN(1, 5, batch_first=True))
('fc', Linear(in_features=5, out_features=1, bias=True))


In [None]:
for name, child in decoder.named_children():
  for param in child.parameters():
        print("#######", name, '#######')
        print(param)

####### rnn #######
Parameter containing:
tensor([[-0.0088],
        [ 0.2419],
        [-0.0851],
        [-0.0805],
        [ 0.1123]], device='cuda:0', requires_grad=True)
####### rnn #######
Parameter containing:
tensor([[ 0.0652, -0.1572, -0.0443,  0.2997, -0.1547],
        [ 0.1909,  0.2987, -0.2079, -0.4656,  0.3826],
        [-0.0635,  0.4770,  0.1646,  0.0788,  0.3076],
        [ 0.3685, -0.2228,  0.1145,  0.2716,  0.2860],
        [ 0.2492, -0.2832, -0.2326, -0.3760, -0.0085]], device='cuda:0',
       requires_grad=True)
####### rnn #######
Parameter containing:
tensor([ 0.4378, -0.3405,  0.1624, -0.2911, -0.4540], device='cuda:0',
       requires_grad=True)
####### rnn #######
Parameter containing:
tensor([-0.1030,  0.1996,  0.3393, -0.3213,  0.0635], device='cuda:0',
       requires_grad=True)
####### fc #######
Parameter containing:
tensor([[0.3251, 0.2477, 0.2367, 0.3446, 0.2930]], device='cuda:0',
       requires_grad=True)
####### fc #######
Parameter containing:
tensor

In [None]:
h = encoder(input)
print("",h)


 tensor([[[ 0.1062, -0.0638, -0.4990,  0.2276, -0.0470],
         [ 0.1036, -0.0455, -0.5152,  0.2342, -0.0350]]], device='cuda:0',
       grad_fn=<ReshapeAliasBackward0>)


In [None]:
decoder_inference = myDecoder(input_size=1, hidden_size=5, num_layers=1).to(device)
for name, child in decoder_inference.named_children():
  for param in child.parameters():
        print("#######", name, '#######')
        print(param)

####### rnn #######
Parameter containing:
tensor([[-0.2314],
        [ 0.0970],
        [-0.1628],
        [-0.1005],
        [-0.3564]], device='cuda:0', requires_grad=True)
####### rnn #######
Parameter containing:
tensor([[-0.2038, -0.1370,  0.1912,  0.0817,  0.1105],
        [ 0.4464,  0.4359,  0.3050,  0.0142, -0.3094],
        [ 0.3495, -0.1118, -0.0362, -0.3853, -0.0883],
        [-0.2884,  0.4109, -0.3866, -0.3486, -0.0152],
        [-0.2418,  0.1600, -0.1721, -0.2101,  0.0253]], device='cuda:0',
       requires_grad=True)
####### rnn #######
Parameter containing:
tensor([ 0.3237, -0.3146,  0.2100,  0.2873,  0.4375], device='cuda:0',
       requires_grad=True)
####### rnn #######
Parameter containing:
tensor([-0.3130,  0.1083, -0.3307,  0.3818, -0.1735], device='cuda:0',
       requires_grad=True)
####### fc #######
Parameter containing:
tensor([[ 0.2694,  0.0133, -0.0348, -0.0143,  0.0760]], device='cuda:0',
       requires_grad=True)
####### fc #######
Parameter containing:
t

In [None]:
decoder.rnn

RNN(1, 5, batch_first=True)

In [None]:
h.shape
yhat = input2[:,0:1,:]
yhat.shape
out, _  = decoder.rnn(yhat, h)
out.shape 
yhat = decoder.fc(out)
yhat.shape
# repeat 
h = out.permute(1,0,2)
h.shape
decoder.rnn(yhat, h)

(tensor([[[ 0.2614, -0.0715,  0.3094, -0.5394, -0.1292]],
 
         [[ 0.2563, -0.0625,  0.3197, -0.5440, -0.1349]]], device='cuda:0',
        grad_fn=<CudnnRnnBackward0>),
 tensor([[[ 0.2614, -0.0715,  0.3094, -0.5394, -0.1292],
          [ 0.2563, -0.0625,  0.3197, -0.5440, -0.1349]]], device='cuda:0',
        grad_fn=<CudnnRnnBackward0>))

In [None]:
# Define the encoder (Teacher forcing)
class myDecoder_inf(nn.Module):
    def __init__(self, decoder):
        super(myDecoder_inf, self).__init__()
        self.rnn = decoder.rnn
        self.fc = decoder.fc
        
    def forward(self, h, x, output_window_size):
        predict_output = []
        yhat = x.clone()
        for k in range(output_window_size):
          out, _ = self.rnn(yhat, h)
          yhat = self.fc(out)
          h = out.permute(1,0,2)
          predict_output.append(yhat)

        predict_output = torch.cat(predict_output, axis = 1)
        return predict_output

In [None]:
decoder_inf = myDecoder_inf(decoder)

In [None]:
out = decoder_inf(h,input2[:,0:1,:], output_window_size)
out.squeeze(2)

tensor([[-0.0107, -0.0071, -0.0299],
        [-0.0084, -0.0085, -0.0326]], device='cuda:0',
       grad_fn=<SqueezeBackward1>)