<a href="https://colab.research.google.com/github/songmac/2023-Sesac-Lecture-and-Project/blob/master/231012_RNN%2C_LSTM%2C_GRU_%EA%B8%B0%EC%B4%88(all_solved).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN

## RNN에서 중요한 파라미터인 return_sequences와 return_state에 대해 알아보자. (default = False)

- return_sequences: hidden state 모두 표시할지
- return_state: last_state 결과 값도 함께 표시할 건지

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

train_X = [[1,2,3,4,5],
           [7,1,3,5,9],
           [3,2,17,12,11],
           [20,25,34,40,51],
           ]

print(np.shape(train_X))

(4, 5)


In [3]:
train_X = np.array(train_X, dtype=np.float32)  #array 형태로 변경
print(train_X.shape)

(4, 5)


In [4]:
train_X = np.array([train_X], dtype=np.float32)  #3차원 tensor 값으로 변경
print(train_X.shape)

(1, 4, 5)


### return_sequences = True, return_state = False

In [6]:
hidden_size = 3
cell = layers.SimpleRNNCell(units = hidden_size) #hidden_state 차원수
rnn = layers.RNN(cell, return_sequences=True, return_state=False) #simpleRNNcell 선언
hidden_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
#tensor (1,3) 출력
#모든 시점의 hidden state이다.

hidden_state : [[[ 0.9757288  -0.770438   -0.99918896]
  [ 0.9040556  -0.9999865  -0.99999976]
  [ 0.99782825 -1.         -1.        ]
  [ 1.         -1.         -1.        ]]] 	 shape : (1, 4, 3)


### return_sequences = False, return_state = False

In [7]:
hidden_size = 3
cell = layers.SimpleRNNCell(units = hidden_size)
rnn = layers.RNN(cell, return_sequences=False, return_state=False)
hidden_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
#tensor (1,3) 출력
#마지막 시점의 hidden state이다. #계산할 때마다 값이 달라짐

hidden_state : [[-1. -1.  1.]] 	 shape : (1, 3)


### return_sequences = Ture, return_state = True

In [12]:
hidden_size = 3
cell = layers.SimpleRNNCell(units = hidden_size)
rnn = layers.RNN(cell, return_sequences=True, return_state=True)
hidden_state, last_state = rnn(train_X) #마지막 cell 계산 정보(값)도 함께 출력

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state : {} \t shape : {}'.format(last_state, last_state.shape))
#tensor (1,3) 출력
#모든 시점의 hidden state이다.

hidden_state : [[[ 0.9972939   0.98473155 -0.14765674]
  [ 0.998511   -0.6208585   0.99894214]
  [ 0.99930507 -0.9995934  -0.9991765 ]
  [ 1.          1.         -0.99999905]]] 	 shape : (1, 4, 3)
last_state : [[ 1.          1.         -0.99999905]] 	 shape : (1, 3)


### return_sequences = False, return_state = True

In [11]:
hidden_size = 3
cell = layers.SimpleRNNCell(units = hidden_size)
rnn = layers.RNN(cell, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state : {} \t shape : {}'.format(last_state, last_state.shape))
#tensor (1,3) 출력
#마지막 시점의 hidden state이다. #계산할 때마다 값이 달라짐

hidden_state : [[ 1.         -0.9999983   0.97233385]] 	 shape : (1, 3)
last_state : [[ 1.         -0.9999983   0.97233385]] 	 shape : (1, 3)


## '토마토를 먹자'를 학습해보자!

In [13]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNNCell, Dense, TimeDistributed, RNN

idx2char = ['토', '마', '를', '먹', '자'] #한글은 학습시킬 수 없기 때문에 글자별로 숫자 변환 후 학습

x_data = [[0, 0, 1, 2, 4, 3]] #토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] #토 마 토 를 먹 자

num_classes = 5 #토큰의 개수
input_dim = 5
sequence_len = 6 #len(x_data)
learning_rate = 0.1

### 데이터 변환 - 원핫인코딩

In [21]:
x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [22]:
x_one_hot #토 토 마 를 자 먹

array([[[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.]]], dtype=float32)

In [23]:
y_one_hot #토 마 토 를 먹 자

array([[[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]], dtype=float32)

In [19]:
x_one_hot.shape #이미 3차원 (시퀀스 수, 시퀀스 길이, dim_size(input_dim))

(1, 6, 5)

In [24]:
y_one_hot

array([[[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]], dtype=float32)

In [27]:
model = Sequential()
cell = SimpleRNNCell(units=num_classes, input_shape=(sequence_len, input_dim))

model.add(RNN(cell=cell,
              return_sequences=True,
              return_state=False,
              input_shape = (sequence_len, input_dim)))
model.add(TimeDistributed(Dense(units=num_classes, activation='softmax')))

model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rnn_10 (RNN)                (None, 6, 5)              55        
                                                                 
 time_distributed (TimeDist  (None, 6, 5)              30        
 ributed)                                                        
                                                                 
Total params: 85 (340.00 Byte)
Trainable params: 85 (340.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7df5bcd6e320>

In [30]:
pred = model.predict(x_one_hot)
pred #softmax에서 그 글자가 나올 확률이 가장 큰 것을 확인해야 함



array([[[0.66270363, 0.07281416, 0.00213777, 0.24711499, 0.0152295 ],
        [0.09171669, 0.52472925, 0.02222828, 0.00754518, 0.3537807 ],
        [0.73370415, 0.02855024, 0.01776904, 0.21339998, 0.00657666],
        [0.01268191, 0.01237652, 0.9255967 , 0.00560303, 0.04374183],
        [0.3319473 , 0.00435913, 0.0118718 , 0.6501052 , 0.00171658],
        [0.04326791, 0.41054457, 0.02310286, 0.00389941, 0.51918525]]],
      dtype=float32)

In [31]:
for i, word in enumerate(pred):
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)])) #argmax : 그 중 가장 큰 값 출력

토 마 토 를 먹 자


# LSTM

### return_sequence=False, return_state=True

In [32]:
from keras.layers import LSTM

#우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3
lstm = LSTM(units=hidden_size, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state= lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state : {} \t shape : {}'.format(last_state, last_state.shape))
print('last_cell_state : {} \t shape : {}'.format(last_cell_state, last_cell_state.shape))
#return_sequence가 False 일때는 마지막 hidden_state가 출력되므로
#hidden_state = last_cell_state의 결과값이 같다
#RNN과 LSTM의 차이점은 LSTM의 경우 return_state = True인 경우 last_cell_state까지 출력해준다는 것이 다르다

hidden_state : [[-8.8054786e-04  1.0151130e-15  3.0866936e-01]] 	 shape : (1, 3)
last_state : [[-8.8054786e-04  1.0151130e-15  3.0866936e-01]] 	 shape : (1, 3)
last_cell_state : [[-1.          0.01850728  0.3239956 ]] 	 shape : (1, 3)


### return_sequence=True, return_state=True

In [34]:
from keras.layers import LSTM

#우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3
lstm = LSTM(units=hidden_size, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state= lstm(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('hidden_state : {} \t shape : {}'.format(hidden_state, hidden_state.shape))
print('last_state : {} \t shape : {}'.format(last_state, last_state.shape))
print('last_cell_state : {} \t shape : {}'.format(last_cell_state, last_cell_state.shape))
#return_sequence가 False 일때는 마지막 hidden_state가 출력되므로
#hidden_state = last_cell_state의 결과값이 같다
#RNN과 LSTM의 차이점은 LSTM의 경우 return_state = True인 경우 last_cell_state까지 출력해준다는 것이 다르다

hidden_state : [[[ 2.0683242e-01 -2.6686648e-01  3.5537310e-02]
  [-5.1455015e-01 -2.1006253e-03  1.1592678e-01]
  [ 7.5408965e-01 -4.2664301e-01  8.1509803e-05]
  [-7.5414830e-01 -1.4390316e-02  2.5813002e-08]]] 	 shape : (1, 4, 3)
last_state : [[-7.5414830e-01 -1.4390316e-02  2.5813002e-08]] 	 shape : (1, 3)
last_cell_state : [[-9.8250484e-01 -1.8275588e+00  2.3699847e-06]] 	 shape : (1, 3)


## 토마토를 먹어보자!!!

In [55]:
#데이터 정리
import numpy as np
import tensorflow as tf
from collections.abc import Sequence

idx2char = ['토', '마', '를', '먹', '자'] #한글은 학습시킬 수 없기 때문에 글자별로 숫자 변환 후 학습

x_data = [[0, 0, 1, 2, 4, 3]] #토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] #토 마 토 를 먹 자

num_classes = 5 #토큰의 개수
input_dim = 5
sequence_len = 6 #len(x_data)
learning_rate = 0.1

x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [56]:
from keras.layers import LSTM

model = Sequential()

model.add(LSTM(units=num_classes,
              return_sequences=True,
              input_shape = (sequence_len, input_dim), activation='tanh'))
model.add(Dense(32, activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              metrics=['accuracy'])

model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 6, 5)              220       
                                                                 
 dense_9 (Dense)             (None, 6, 32)             192       
                                                                 
 dense_10 (Dense)            (None, 6, 5)              165       
                                                                 
Total params: 577 (2.25 KB)
Trainable params: 577 (2.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [57]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7df5bda4de40>

In [58]:
pred = model.predict(x_one_hot)
pred #softmax에서 그 글자가 나올 확률이 가장 큰 것을 확인해야 함





array([[[5.9562790e-01, 4.0336114e-01, 3.6712965e-05, 6.5421847e-07,
         9.7370730e-04],
        [4.1916433e-01, 5.8070105e-01, 3.9237543e-06, 1.3059368e-08,
         1.3065859e-04],
        [9.8236418e-01, 1.7286567e-02, 4.9848022e-05, 1.7484754e-07,
         2.9921479e-04],
        [2.6231766e-05, 2.1979199e-06, 9.9509180e-01, 7.3998212e-04,
         4.1397261e-03],
        [6.2789900e-06, 6.5955980e-08, 3.6092525e-04, 9.9772197e-01,
         1.9107234e-03],
        [3.6655909e-03, 1.1347920e-03, 6.5774694e-03, 2.0363277e-05,
         9.8860174e-01]]], dtype=float32)

In [63]:
for i, word in enumerate(pred): #enumerate : 인덱스와 요소를 튜플로 반환
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)])) #argmax : 그 중 가장 큰 값 출력

토 마 토 를 먹 자


# GRU

### return_sequence=False, return_state=True

In [71]:
from keras.layers import GRU

#우선 hidden_size는 임의로 3으로 정한다.
hidden_size = 3
gru = GRU(units=hidden_size, return_sequences=False, return_state=True)
whole_sequence_output, final_state = gru(train_X)

# print('train_X : {} \t shape : {}'.format(train_X, train_X.shape))
print('whole_sequence_output : {} \t shape : {}'.format(whole_sequence_output, whole_sequence_output.shape))
print('final_state : {} \t shape : {}'.format(final_state, final_state.shape))

whole_sequence_output : [[ 0.9780052  -0.7302151  -0.30159402]] 	 shape : (1, 3)
final_state : [[ 0.9780052  -0.7302151  -0.30159402]] 	 shape : (1, 3)


## 토마토를 먹어보자!!!!!

In [66]:
#데이터 정리
import numpy as np
import tensorflow as tf
from collections.abc import Sequence

idx2char = ['토', '마', '를', '먹', '자'] #한글은 학습시킬 수 없기 때문에 글자별로 숫자 변환 후 학습

x_data = [[0, 0, 1, 2, 4, 3]] #토 토 마 를 자 먹
y_data = [[0, 1, 0, 2, 3, 4]] #토 마 토 를 먹 자

num_classes = 5 #토큰의 개수
input_dim = 5
sequence_len = 6 #len(x_data)
learning_rate = 0.1

x_one_hot = tf.keras.utils.to_categorical(x_data, num_classes=num_classes)
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

In [67]:
from keras.layers import GRU

model = Sequential()

model.add(GRU(units=num_classes,
              return_sequences=True,
              input_shape = (sequence_len, input_dim), activation='tanh'))
model.add(Dense(32, activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              metrics=['accuracy'])

model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_1 (GRU)                 (None, 6, 5)              180       
                                                                 
 dense_11 (Dense)            (None, 6, 32)             192       
                                                                 
 dense_12 (Dense)            (None, 6, 5)              165       
                                                                 
Total params: 537 (2.10 KB)
Trainable params: 537 (2.10 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [68]:
model.fit(x_one_hot, y_one_hot, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7df5bce04220>

In [69]:
pred = model.predict(x_one_hot)
pred #softmax에서 그 글자가 나올 확률이 가장 큰 것을 확인해야 함



array([[[9.56170082e-01, 4.38085198e-02, 1.26824671e-05, 8.63139576e-06,
         6.36264375e-08],
        [7.13717937e-03, 9.91853654e-01, 9.87289706e-04, 3.00907823e-06,
         1.88845261e-05],
        [9.99683380e-01, 3.15327663e-04, 1.30732769e-06, 2.41179841e-08,
         4.35543772e-13],
        [2.18483329e-08, 6.41604947e-06, 9.99992251e-01, 2.42136231e-08,
         1.36901883e-06],
        [9.59996882e-08, 5.08317264e-08, 1.00255114e-07, 9.99999404e-01,
         3.74943909e-07],
        [2.57438038e-12, 2.12871396e-06, 8.42152731e-05, 1.51879433e-07,
         9.99913454e-01]]], dtype=float32)

In [70]:
for i, word in enumerate(pred): #enumerate : 인덱스와 요소를 튜플로 반환
  print(" ".join([idx2char[c] for c in np.argmax(word, axis=1)])) #argmax : 그 중 가장 큰 값 출력

토 마 토 를 먹 자
