# Evn

In [1]:
import os
import random
import shutil
import json
import zipfile
import math
import copy
import collections
import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

from tqdm.notebook import tqdm

In [2]:
# random seed initialize
random_seed = 1234
random.seed(random_seed)
np.random.seed(random_seed)
tf.random.set_seed(random_seed)

In [3]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



# 입력 및 Vocab

In [4]:
# 입력 문장
sentences = [
    '나는 오늘 기분이 좋아 나는 오늘 우울해'
]

In [5]:
# 각 문장을 띄어쓰기 단위로 분할
words = []
for sentence in sentences:
    words.extend(sentence.split())

# 중복 단어 제거
words = list(dict.fromkeys(words))

# 각 단어별 고유한 번호 부여
word_to_id = {'[PAD]': 0, '[UNK]': 1}
for word in words:
    word_to_id[word] = len(word_to_id)

# 각 숫자별 단어 부여
id_to_word = {_id:word for word, _id in word_to_id.items()}

word_to_id, id_to_word

({'[PAD]': 0, '[UNK]': 1, '기분이': 4, '나는': 2, '오늘': 3, '우울해': 6, '좋아': 5},
 {0: '[PAD]', 1: '[UNK]', 2: '나는', 3: '오늘', 4: '기분이', 5: '좋아', 6: '우울해'})

In [6]:
# 학습용 입력 데이터 생성
train_inputs = []
for sentence in sentences:
    train_inputs.append([word_to_id[word] for word in sentence.split()])

# train inputs을 numpy array로 변환
train_inputs = np.array(train_inputs)

train_inputs

array([[2, 3, 4, 5, 2, 3, 6]])

In [7]:
# embedding
embedding = tf.keras.layers.Embedding(len(word_to_id), 4)

In [8]:
# 단어벡터
hidden = embedding(train_inputs)
hidden

<tf.Tensor: shape=(1, 7, 4), dtype=float32, numpy=
array([[[ 0.02788493,  0.00853021,  0.02688133, -0.00167429],
        [ 0.00873896, -0.04772998, -0.00643448,  0.04782395],
        [ 0.04810704, -0.03535189,  0.04945729,  0.02813626],
        [-0.02798697, -0.00110071,  0.0133124 ,  0.00637371],
        [ 0.02788493,  0.00853021,  0.02688133, -0.00167429],
        [ 0.00873896, -0.04772998, -0.00643448,  0.04782395],
        [-0.00963336, -0.01013004,  0.01826939,  0.02741292]]],
      dtype=float32)>

# RNN

## return sequence=False, return_state=False

In [30]:
# rnn
rnn_11 = tf.keras.layers.SimpleRNN(units = 5)
output_11 = rnn_11(hidden)
print(output_11)


tf.Tensor([[-0.08182613  0.03313228 -0.08073361 -0.02169145 -0.02739608]], shape=(1, 5), dtype=float32)


In [31]:
# rnn weights
weights = run_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]

print(Wx.shape)
print(Wh.shape)
print(b.shape)

(4, 5)
(5, 5)
(5,)



## return sequence=True, return_state=False

In [23]:
# (return_sequences=True)
rnn_12 = tf.keras.layers.SimpleRNN(units = 5, return_sequences=True)
output_12 = rnn_12(hidden)
output_12

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[-0.02564576,  0.0414134 , -0.00366702,  0.00832783,
         -0.02934231],
        [ 0.03541386, -0.03377459,  0.02673792, -0.05788843,
          0.07016172],
        [-0.06389104,  0.11147652,  0.06913166, -0.04338427,
         -0.09603855],
        [-0.00451822, -0.02526461, -0.0423482 , -0.16096975,
          0.13076156],
        [-0.1574293 ,  0.05024347,  0.13930199, -0.00516978,
         -0.11125721],
        [ 0.05772443,  0.06315199,  0.03761888, -0.11873236,
          0.24143262],
        [-0.22489886,  0.0802756 ,  0.02204562, -0.07525434,
         -0.12735648]]], dtype=float32)>

## return sequence=False, return_state=True

In [33]:
# (return_state=True)
rnn_13 = tf.keras.layers.SimpleRNN(units = 5, return_sequences=False, return_state= True)
output_13, fw_h_13 = rnn_13(hidden)
output_13

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[0.07126223, 0.07867122, 0.15415551, 0.07626989, 0.08865989]],
      dtype=float32)>

## return sequence=True, return_state=True

In [34]:
# (return_sequences=True, return_state=True)
rnn_14 = tf.keras.layers.SimpleRNN(units = 5, return_sequences= True, return_state= True)
output_14, fw_h_14 = rnn_14(hidden)
output_14

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[ 0.00314281,  0.01055995, -0.03561208, -0.04000567,
         -0.02335886],
        [ 0.01255181, -0.02320551,  0.00905954, -0.00760356,
         -0.02301774],
        [ 0.02589694,  0.01753716, -0.0588303 , -0.02003263,
         -0.04096285],
        [-0.05704739, -0.0428019 , -0.02890472, -0.02171617,
         -0.02604887],
        [-0.06477052,  0.03514048, -0.00264515, -0.02312021,
         -0.04499031],
        [-0.0199968 , -0.05741857,  0.05183109,  0.01421103,
          0.02709923],
        [ 0.01161555,  0.02362226,  0.00568394,  0.06808266,
          0.04145366]]], dtype=float32)>

## init hidden state

In [None]:
# rnn_11 with fw_h_13
rnn_11(hidden, initial_state=[fw_h_13])  # (bs, units)

In [None]:
# rnn_12 with fw_h_14
rnn_12(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

In [None]:
# rnn_13 with fw_h_13
rnn_13(hidden, initial_state=[fw_h_13])  # (bs, units)

In [None]:
# rnn_14 with fw_h_14.
rnn_14(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

# LSTM

## return sequence=False, return_state=False

In [40]:
# lstm
lstm_11 = tf.keras.layers.LSTM(units = 5)
output_11 = lstm_11(hidden)
print(output_11)

tf.Tensor([[ 0.00699196  0.00437341  0.0025941  -0.0012665   0.00110581]], shape=(1, 5), dtype=float32)


In [41]:
# lstm weights
weights = lstm_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]

print(Wx.shape)
print(Wh.shape)
print(b.shape)

(4, 20)
(5, 20)
(20,)


## return sequence=True, return_state=False

In [43]:
# (return_sequences=True)
# lstm
lstm_12 = tf.keras.layers.LSTM(units = 5, return_state=True)
output_12 = lstm_12(hidden)
print(output_12)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[ 0.00511028,  0.0013761 ,  0.00686375, -0.01015518,  0.01634173]],
      dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[ 0.00511028,  0.0013761 ,  0.00686375, -0.01015518,  0.01634173]],
      dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[ 0.01022531,  0.00275458,  0.01357382, -0.02040049,  0.03267181]],
      dtype=float32)>]


In [44]:
weights = lstm_12.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]

print(Wx.shape)
print(Wh.shape)
print(b.shape)

(4, 20)
(5, 20)
(20,)


## return sequence=False, return_state=True

In [45]:
# (return_state=True)
# (return_sequences=True)
# lstm
lstm_13 = tf.keras.layers.LSTM(units = 5, return_state=True, return_sequences=False)
output_13, fw_h_13, fw_c_13 = lstm_13(hidden)
print(output_13)

tf.Tensor([[0.00106037 0.01103816 0.00193962 0.00628064 0.00620387]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [48]:
# (return_sequences=True, return_state=True)
# (return_sequences=True)
# lstm
# lstm
lstm_14 = tf.keras.layers.LSTM(units = 5, return_state=True, return_sequences=True)
output_14, fw_h_14, fw_c_14 = lstm_14(hidden)
print(output_14)

tf.Tensor(
[[[ 0.00059376 -0.00109264 -0.00108926  0.00101494  0.00198455]
  [ 0.00290888  0.00565168  0.00687769  0.00021707  0.00627362]
  [ 0.00752213  0.00836171  0.00851842 -0.00200741  0.01095115]
  [ 0.00779969  0.00591761  0.00644313 -0.00502358  0.00607413]
  [ 0.00741617  0.00263899  0.00481624 -0.00240269  0.00604138]
  [ 0.00901949  0.00788073  0.01222465 -0.00220951  0.00895655]
  [ 0.00983144  0.00677333  0.01250601 -0.00395866  0.00803152]]], shape=(1, 7, 5), dtype=float32)


## init hidden state

In [46]:
# lstm_11 with fw_h_13, fw_c_13
lstm_11(hidden, initial_state=[fw_h_13, fw_c_13])

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.01271887,  0.00741965, -0.00714908, -0.00294344,  0.01038941]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.01271887,  0.00741965, -0.00714908, -0.00294344,  0.01038941]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.02527537,  0.01490136, -0.01427226, -0.00584871,  0.02064005]],
       dtype=float32)>]

In [49]:
# lstm_12 with fw_h_14, fw_c_14
lstm_12(hidden, initial_state=[fw_h_14, fw_c_14])

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.0062019 ,  0.00035636,  0.0090909 , -0.01227063,  0.01810135]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.0062019 ,  0.00035636,  0.0090909 , -0.01227063,  0.01810135]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.0124105 ,  0.00071342,  0.01796763, -0.02466788,  0.03621345]],
       dtype=float32)>]

In [50]:
# lstm_13 with fw_h_13, fw_c_13
lstm_13(hidden, initial_state=[fw_h_13, fw_c_13])

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[0.0025416 , 0.01290802, 0.00091365, 0.00714734, 0.00699123]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[0.0025416 , 0.01290802, 0.00091365, 0.00714734, 0.00699123]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[0.00510934, 0.02574032, 0.00183873, 0.01427506, 0.01398708]],
       dtype=float32)>]

In [51]:
# lstm_14 with fw_h_14, fw_c_14
lstm_14(hidden, initial_state=[fw_h_14, fw_c_14])

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
 array([[[ 0.00935489,  0.00305615,  0.00975044, -0.00203683,
           0.00747476],
         [ 0.01096141,  0.00799067,  0.01630442, -0.00225187,
           0.00999205],
         [ 0.01486224,  0.00946736,  0.01653919, -0.00406788,
           0.01367347],
         [ 0.014371  ,  0.00622153,  0.0133067 , -0.00673656,
           0.00812526],
         [ 0.01348843,  0.00240546,  0.01071598, -0.00384305,
           0.00764984],
         [ 0.01467262,  0.00731951,  0.0173389 , -0.0034216 ,
           0.01022267],
         [ 0.01492038,  0.00600435,  0.01689395, -0.00497877,
           0.00911159]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.01492038,  0.00600435,  0.01689395, -0.00497877,  0.00911159]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.02964479,  0.01213768,  0.03377584, -0.00993087,  0.01825538]],
       dtype=float32)>]

# GRU

## return sequence=False, return_state=False

In [None]:
# GRU

In [None]:
# GRU weights

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)

## return sequence=False, return_state=True

In [None]:
# (return_state=True)

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)

## init hidden state

In [None]:
gru_11(hidden, initial_state=[fw_h_13])  # (bs, units)

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-0.00140842,  0.01592344, -0.01623747, -0.00670336, -0.01101907]],
      dtype=float32)>

In [None]:
gru_12(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units)

<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
array([[[-0.00300019, -0.00323922, -0.01454308, -0.00958822,
          0.01849951],
        [-0.00319965, -0.00230684, -0.00040348,  0.0188137 ,
          0.02772161],
        [-0.01684529, -0.02090497, -0.00366238,  0.02805248,
          0.04653085],
        [-0.00582952, -0.00502904,  0.00368652,  0.0209058 ,
          0.02341765],
        [-0.01303946, -0.01359165, -0.00516605,  0.01199155,
          0.02483771],
        [-0.01180617, -0.00767625,  0.00535123,  0.03328608,
          0.03071455],
        [-0.00979845, -0.00366684,  0.01045386,  0.03020769,
          0.02639962]]], dtype=float32)>

In [None]:
gru_13(hidden, initial_state=[fw_h_13])  # (bs, units), (bs, units)

[<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00188146,  0.01085301,  0.00768062,  0.01064109, -0.00205139]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.00188146,  0.01085301,  0.00768062,  0.01064109, -0.00205139]],
       dtype=float32)>]

In [None]:
gru_14(hidden, initial_state=[fw_h_14])  # (bs, n_seq, units), (bs, units)

[<tf.Tensor: shape=(1, 7, 5), dtype=float32, numpy=
 array([[[ 0.00120912,  0.01436689, -0.00526015, -0.00245608,
           0.01752719],
         [ 0.00106435,  0.0148916 , -0.01622299, -0.01184704,
           0.01214018],
         [ 0.00249824,  0.02077618, -0.01704834, -0.01405097,
           0.03059548],
         [ 0.01132524,  0.01315177, -0.01130751, -0.0072835 ,
           0.01345716],
         [ 0.00543236,  0.0139883 , -0.00539421, -0.00206096,
           0.01916937],
         [ 0.00361664,  0.0148993 , -0.01673315, -0.01203724,
           0.01301165],
         [ 0.00488993,  0.01504152, -0.01344696, -0.01023171,
           0.01024941]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00488993,  0.01504152, -0.01344696, -0.01023171,  0.01024941]],
       dtype=float32)>]

# Bidirectional RNN

## return sequence=False, return_state=False

In [52]:
# bi rnn
bi_rnn_11 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5))
output_11 = bi_rnn_11(hidden)  # (bs, units * 2)
print(output_11)

tf.Tensor(
[[-0.1538529  -0.0256653  -0.10163426  0.07767909  0.07455868 -0.05295386
   0.1637144   0.03707305  0.03730908  0.071001  ]], shape=(1, 10), dtype=float32)


In [53]:
# bi rnn weights
weights = bi_rnn_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit)
print(Wh.shape)  # (unit, unit)
print(b.shape)  # (unit,)

(4, 5)
(5, 5)
(5,)


## return sequence=True, return_state=False

In [54]:
# (return_sequences=True)
bi_rnn_12 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_sequences=True))
output_12 = bi_rnn_12(hidden)  # (bs, seq, units* 2)
print(output_12)

tf.Tensor(
[[[ 0.00042999  0.01861845  0.03380582 -0.0277166  -0.01102985
    0.09142408  0.00431319  0.00246824  0.05289203  0.09165204]
  [-0.0199765  -0.08215499 -0.00507331 -0.02071114  0.00154722
    0.10291745  0.02190209 -0.00072478  0.0481161   0.09873346]
  [ 0.02733728  0.0313317   0.02779091 -0.06298349 -0.00207244
    0.04727722  0.03279176 -0.00913265  0.05059399  0.07428965]
  [-0.02965109 -0.08778792 -0.02174974  0.05879542 -0.00517825
    0.06004259 -0.00342074 -0.02809907  0.00461782  0.04789938]
  [ 0.02749345  0.1200547   0.01559181 -0.0602874  -0.01332535
    0.02862095  0.0208326   0.01079698  0.00695901  0.06470383]
  [-0.03542952 -0.18849123  0.00832944 -0.03625308  0.00036279
    0.04553087  0.03963753 -0.00164247  0.02317123  0.04454299]
  [ 0.07033625  0.10363517 -0.07253242  0.0577006   0.00304899
    0.0081668   0.02353677 -0.01344333  0.01915813  0.0063339 ]]], shape=(1, 7, 10), dtype=float32)


In [55]:
bi_rnn_13 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_state=True))
output_13, fw_h_13, bw_h_13 = bi_rnn_13(hidden)  # (bs, units * 2), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(bw_h_13)


tf.Tensor(
[[ 0.206432   -0.05204804  0.04417088  0.18773906 -0.05972274  0.01808426
  -0.04471127  0.10663958  0.02962003 -0.03498914]], shape=(1, 10), dtype=float32)
tf.Tensor([[ 0.206432   -0.05204804  0.04417088  0.18773906 -0.05972274]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.01808426 -0.04471127  0.10663958  0.02962003 -0.03498914]], shape=(1, 5), dtype=float32)


## return sequence=False, return_state=True

In [56]:
# (return_state=True)
bi_rnn_13 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_state=True))
output_13, fw_h_13, bw_h_13 = bi_rnn_13(hidden)  # (bs, units * 2), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(bw_h_13)

tf.Tensor(
[[ 0.04503809  0.04798897 -0.20629981 -0.1335842   0.10895995 -0.08954202
  -0.09520603 -0.03947056 -0.01417204  0.08453585]], shape=(1, 10), dtype=float32)
tf.Tensor([[ 0.04503809  0.04798897 -0.20629981 -0.1335842   0.10895995]], shape=(1, 5), dtype=float32)
tf.Tensor([[-0.08954202 -0.09520603 -0.03947056 -0.01417204  0.08453585]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [57]:
# (return_sequences=True, return_state=True)
bi_rnn_14 = tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(units=5, return_sequences=True, return_state=True))
output_14, fw_h_14, bw_h_14 = bi_rnn_14(hidden)  # (bs, seq, units * 2), (bs, units), (bs, units)
print(output_14)
print(fw_h_14)
print(bw_h_14)

tf.Tensor(
[[[-0.03654061 -0.01259436 -0.02535491  0.00358858  0.01374843
   -0.02927018 -0.03968795 -0.0244882   0.08984867 -0.01691988]
  [ 0.05188211 -0.06143777 -0.04022064 -0.02887872  0.01530047
    0.03710962  0.03534402 -0.03509644  0.01543774 -0.07236759]
  [ 0.02795666 -0.05425167 -0.00503947  0.03997935  0.02919459
    0.01712837 -0.05636651 -0.04480319 -0.02348963 -0.00660498]
  [ 0.01572133 -0.02547044  0.04144435  0.03498301  0.01682415
   -0.00151243 -0.00118696  0.06849623 -0.00633021 -0.03975024]
  [-0.06495085 -0.03319306 -0.0188917   0.04856754  0.0422868
    0.01389622  0.0197619  -0.01622705  0.07262667  0.00065615]
  [ 0.03255654 -0.11876653 -0.05118802 -0.02234135  0.03045251
    0.03254187 -0.03589057 -0.02252691  0.01893168 -0.04384354]
  [ 0.10011837 -0.04485774  0.02435796  0.05851143 -0.01472043
    0.03365615 -0.00776441  0.00355732 -0.02814699 -0.03967385]]], shape=(1, 7, 10), dtype=float32)
tf.Tensor([[ 0.10011837 -0.04485774  0.02435796  0.05851143 -0.01

## init hidden state

In [58]:
# bi_rnn_11 with fw_h_13, bw_h_13
bi_rnn_11(hidden, initial_state=[fw_h_13, bw_h_13])  # (bs, units * 2)


<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-0.02445327, -0.0429127 , -0.13820861, -0.12921065,  0.16704144,
         0.01838053,  0.25191048,  0.07055118,  0.03653603, -0.02291922]],
      dtype=float32)>

In [60]:
# bi_rnn_12 with fw_h_14, bw_h_14
bi_rnn_12(hidden, initial_state=[fw_h_14, bw_h_14])  # (bs, n_seq, units * 2)



<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
array([[[-8.03275108e-02,  6.39105439e-02, -2.07288675e-02,
         -4.83145490e-02,  5.41797839e-02,  1.09192491e-01,
          4.49565798e-03,  9.70413685e-02,  9.10641998e-02,
          8.98473635e-02],
        [ 7.04681575e-02, -1.18037112e-01,  7.28718638e-02,
         -4.20283712e-02, -1.09757641e-02,  9.85137746e-02,
         -8.05854797e-05, -2.08925270e-02,  1.47080824e-01,
          8.88869017e-02],
        [-5.40950596e-02,  4.41776365e-02, -1.85662042e-02,
          5.99041767e-03,  4.71798107e-02,  7.85225853e-02,
         -5.55330813e-02,  3.36597897e-02,  6.15281537e-02,
          9.10483599e-02],
        [ 2.35754270e-02, -5.26550375e-02,  5.66508807e-02,
         -1.59932524e-02, -2.21062526e-02,  3.53901312e-02,
         -3.88984717e-02, -7.77679607e-02,  4.32101823e-02,
          1.20095305e-01],
        [-3.46872285e-02,  5.87878935e-02,  7.90379709e-04,
          2.99538951e-02, -7.79460976e-03,  1.76729169e-02,


In [None]:
# bi_rnn_13 with fw_h_13, bw_h_13
bi_rnn_13(hidden, initial_state=[fw_h_13, bw_h_13])

In [62]:
# bi_rnn_14 with fw_h_14, bw_h_14
# bi_rnn_12 with fw_h_14, bw_h_14
bi_rnn_14(hidden, initial_state=[fw_h_14, bw_h_14]) 

[<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
 array([[[-0.07718934, -0.012771  ,  0.0676244 ,  0.0796417 ,
           0.02517752, -0.08205486, -0.08334333,  0.0021776 ,
           0.05497647,  0.04968703],
         [-0.04249839, -0.09924039, -0.08939911,  0.00644598,
           0.06163979, -0.01623391,  0.08073498,  0.0330582 ,
           0.04128516, -0.10219134],
         [ 0.04838944, -0.16440527, -0.04845624,  0.00711544,
           0.01170926,  0.00899977, -0.0546614 , -0.08796946,
           0.07196388,  0.00339738],
         [ 0.08253065, -0.05447004,  0.06977543,  0.10235824,
          -0.05450636,  0.02365367, -0.01770282,  0.01630176,
          -0.02138313, -0.12535733],
         [-0.14453664, -0.01905016,  0.04933581,  0.10601061,
           0.00991273,  0.04497056,  0.04774151, -0.07008178,
           0.00884983,  0.05155825],
         [-0.06307929, -0.14326672, -0.12626547, -0.02124311,
           0.02627003, -0.02146599, -0.08442739,  0.01927119,
          -0.04387

# Bidirectional LSTM

## return sequence=False, return_state=False

In [64]:
# lstm
bi_lstm_11 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5))
output_11 = bi_lstm_11(hidden)  # (bs, seq, units * 2)
print(output_11)


tf.Tensor(
[[-0.00412068 -0.0062705   0.00786117  0.00181883 -0.00162668 -0.0088844
  -0.01367826 -0.00613328  0.01330866  0.00518704]], shape=(1, 10), dtype=float32)


In [67]:
# lstm weights
weights = bi_lstm_11.get_weights()
Wx = weights[0]
Wh = weights[1]
b = weights[2]
print(Wx.shape)  # (d_model, unit * 4) (Wxf, Wxi, Wxc, Wxo)
print(Wh.shape)  # (unit, unit * 4) (Whf, Whi, Whc, Who)
print(b.shape)  # (unit * 4) (bf, bi, bc, bo)

(4, 20)
(5, 20)
(20,)


## return sequence=True, return_state=False

In [68]:
# (return_sequences=True)
bi_lstm_12 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_sequences=True))
output_12 = bi_lstm_12(hidden)  # (bs, seq, units * 2)
print(output_12)

tf.Tensor(
[[[ 5.59307868e-03 -1.43693970e-03 -2.16347538e-03 -4.02175169e-03
   -1.36370887e-03  9.51858889e-03 -1.36263305e-02 -8.11715145e-04
   -5.24394400e-03  4.79393842e-04]
  [ 5.42495074e-03  1.75273663e-03 -2.21556053e-03  3.66599346e-03
   -3.88882589e-03  6.46521710e-03 -1.36089344e-02 -3.78564722e-03
   -4.98389499e-03 -7.19355606e-03]
  [ 1.49750849e-02  2.32107146e-03 -4.79207095e-03  6.20269042e-04
   -7.12032057e-03  9.11296066e-03 -1.23678548e-02  2.69932346e-03
   -8.43632687e-03  8.28444783e-04]
  [ 8.26100074e-03  1.74974441e-03  1.26565783e-03  3.66413454e-03
   -7.73442164e-03  2.78827571e-03 -5.03092073e-03 -9.94121539e-04
   -1.13388768e-03 -6.38574653e-04]
  [ 1.08838975e-02 -2.75366183e-05 -8.40049470e-04 -1.01485569e-03
   -7.62888556e-03  5.97890420e-03 -7.91593362e-03  7.06475286e-04
   -2.18938058e-03 -8.14863713e-04]
  [ 8.65415949e-03  2.90750898e-03 -1.04452297e-03  6.02334784e-03
   -8.87185894e-03  1.17542944e-03 -5.90494182e-03 -3.59494379e-03
    6

## return sequence=False, return_state=True

In [69]:
# (return_state=True)
bi_lstm_13 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_state=True))
output_13, fw_h_13, fw_c_13, bw_h_13, bw_c_13 = bi_lstm_13(hidden)  # (bs, units * 2), (bs, units), (bs, units), (bs, units), (bs, units)
print(output_13)
print(fw_h_13)
print(fw_c_13)
print(bw_h_13)
print(bw_c_13)

tf.Tensor(
[[-0.01847664 -0.01717866 -0.01455096 -0.00403581  0.00520225  0.00247622
   0.00704844  0.00227592 -0.00264071 -0.00672963]], shape=(1, 10), dtype=float32)
tf.Tensor([[-0.01847664 -0.01717866 -0.01455096 -0.00403581  0.00520225]], shape=(1, 5), dtype=float32)
tf.Tensor([[-0.03701148 -0.03410234 -0.02905312 -0.0080827   0.01050455]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.00247622  0.00704844  0.00227592 -0.00264071 -0.00672963]], shape=(1, 5), dtype=float32)
tf.Tensor([[ 0.00494606  0.01426817  0.00458811 -0.00530699 -0.01349762]], shape=(1, 5), dtype=float32)


## return sequence=True, return_state=True

In [70]:
# (return_sequences=True, return_state=True)
bi_lstm_14 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=5, return_sequences=True, return_state=True))
output_14, fw_h_14, fw_c_14, bw_h_14, bw_c_14 = bi_lstm_14(hidden)  # (bs, seq, units), (bs, units), (bs, units), (bs, units), (bs, units)
print(output_14)
print(fw_h_14)
print(fw_c_14)
print(bw_h_14)
print(bw_c_14)

tf.Tensor(
[[[-0.00631327  0.00020226 -0.00249581 -0.00222378  0.00160725
   -0.00153126  0.0051608   0.01623868 -0.00138526 -0.00184595]
  [-0.0001583  -0.00177069  0.00636381  0.00476604  0.0052069
    0.00422231  0.00383904  0.01914648  0.00116657 -0.00043464]
  [-0.0062639  -0.00064289  0.00462585  0.00631612  0.00794407
   -0.00317268  0.00138541  0.0158445  -0.00347207 -0.00254659]
  [-0.00425812  0.00132639  0.00020326  0.00859033  0.00416241
    0.00119956 -0.00057372  0.00453301  0.00108235  0.00030032]
  [-0.00955186  0.0014321  -0.0024945   0.0039799   0.00525159
    0.0003114   0.00297156  0.00970575 -0.00076363 -0.00191017]
  [-0.00279158 -0.0006622   0.00623789  0.00911862  0.00841716
    0.00671738  0.00150737  0.00944808  0.00333584  0.00035412]
  [-0.00274947 -0.00060721  0.00312654  0.01213499  0.00786342
    0.00029456 -0.00075656  0.00234319  0.00150466 -0.00042868]]], shape=(1, 7, 10), dtype=float32)
tf.Tensor([[-0.00274947 -0.00060721  0.00312654  0.01213499  0.00

## init hidden state

In [71]:
# bi_lstm_11 with fw_h_13, fw_c_13, bw_h_13, bw_c_13
bi_lstm_11(hidden, initial_state=[fw_h_13, fw_c_13, bw_h_13, bw_c_13])  # (bs, units * 2)


<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-0.00902691, -0.00892308,  0.00632085,  0.00116464, -0.0021296 ,
        -0.00840026, -0.0118921 , -0.00639943,  0.01211318,  0.00544223]],
      dtype=float32)>

In [72]:
# bi_lstm_12 with fw_h_14, fw_c_14, bw_h_14, bw_c_14
bi_lstm_12(hidden, initial_state=[fw_h_14, fw_c_14, bw_h_14, bw_c_14])

<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
array([[[ 4.3676449e-03, -2.3316750e-03,  5.0557783e-04,  3.6014901e-03,
          4.9915407e-03,  8.9759538e-03, -1.3292466e-02,  6.4740999e-04,
         -6.1985389e-03,  1.5676529e-05],
        [ 5.2558701e-03,  7.2890782e-04, -1.0477982e-05,  8.3481669e-03,
          1.1715934e-03,  5.7764943e-03, -1.3144056e-02, -1.7902823e-03,
         -6.1819195e-03, -7.8645637e-03],
        [ 1.5494184e-02,  1.2466809e-03, -2.8881596e-03,  3.4885674e-03,
         -3.1175134e-03,  8.2810819e-03, -1.1703683e-02,  5.5934950e-03,
         -9.9405237e-03, -8.7428030e-05],
        [ 9.1938851e-03,  6.9283909e-04,  2.8346109e-03,  5.3434507e-03,
         -4.5058327e-03,  1.7769254e-03, -4.0512504e-03,  3.1620655e-03,
         -2.8954390e-03, -1.8517172e-03],
        [ 1.2047971e-02, -1.0430265e-03,  5.1833072e-04, -8.5475231e-06,
         -5.0180829e-03,  4.7896118e-03, -6.4506782e-03,  6.5659727e-03,
         -4.2417059e-03, -2.3764395e-03],
        

In [73]:
# bi_lstm_13 with fw_h_13, fw_c_13, bw_h_13, bw_c_13
bi_lstm_13(hidden, initial_state=[fw_h_13, fw_c_13, bw_h_13, bw_c_13]) 

[<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[-0.01915487, -0.02142809, -0.01631224, -0.0055634 ,  0.00611594,
          0.00344053,  0.00820547,  0.00241343, -0.00211693, -0.00703022]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.01915487, -0.02142809, -0.01631224, -0.0055634 ,  0.00611594]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[-0.03838354, -0.04251697, -0.03255511, -0.01115599,  0.01236029]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00344053,  0.00820547,  0.00241343, -0.00211693, -0.00703022]],
       dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
 array([[ 0.00687043,  0.01661347,  0.00486459, -0.00425266, -0.01409592]],
       dtype=float32)>]

In [74]:
# bi_lstm_14 with fw_h_14, fw_c_14, bw_h_14, bw_c_14
bi_lstm_14(hidden, initial_state=[fw_h_14, fw_c_14, bw_h_14, bw_c_14])

[<tf.Tensor: shape=(1, 7, 10), dtype=float32, numpy=
 array([[[-0.00834693,  0.00018947, -0.00125444,  0.00696849,
           0.00818104, -0.0012792 ,  0.00637476,  0.01650755,
          -0.00118886, -0.00059616],
         [-0.00181054, -0.0014957 ,  0.00649423,  0.01139424,
           0.01081412,  0.00449474,  0.00546267,  0.0198998 ,
           0.00154908,  0.00093327],
         [-0.00764754, -0.00023186,  0.0041062 ,  0.01103737,
           0.01245812, -0.00289433,  0.00349131,  0.01736823,
          -0.00288366, -0.00109792],
         [-0.00548853,  0.0017705 , -0.0006314 ,  0.01181266,
           0.00777215,  0.00141378,  0.00208272,  0.00721565,
           0.00188393,  0.00175016],
         [-0.01064796,  0.00187603, -0.003428  ,  0.00612769,
           0.00810057,  0.00037239,  0.00628921,  0.01424147,
           0.00017834, -0.00067411],
         [-0.00381151, -0.00023899,  0.00535089,  0.01045593,
           0.01071067,  0.00647881,  0.00556784,  0.0167798 ,
           0.00417

# Bidirectional GRU

## return sequence=False, return_state=False

In [None]:
# bi GRU

In [None]:
# bi GRU weights

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)

## return sequence=False, return_state=True

In [None]:
# (return_state=True)

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)

## init hidden state

In [None]:
# bi_gru_11 with fw_h_13, bw_h_13

In [None]:
# bi_gru_12 with fw_h_14, bw_h_14

In [None]:
# bi_gru_13 with fw_h_13, bw_h_13

In [None]:
# bi_gru_14 with fw_h_14, bw_h_14

# RNN 모델

In [83]:
def build_model(n_vocab, d_model, n_seq, n_out):
    """
    RNN 모델
    :param n_vocab: vocabulary 단어 수
    :param d_model: 단어를 의미하는 벡터의 차원 수
    :param n_seq: 문장길이 (단어 수)
    :param n_out: 예측할 class 개수
    """
    input = tf.keras.layers.Input(shape=(n_seq,))

    embedding = tf.keras.layers.Embedding(n_vocab, d_model)  # (n_vocab x d_model)
    hidden = embedding(input)  # (bs, 1, d_model)

    ########################################
    hidden = tf.keras.layers.SimpleRnn(units=128, return_sequence=True)(hidden)
    
    ########################################

    output = tf.keras.layers.Dense(n_out, activation=tf.nn.softmax)(hidden)  # (bs, 1, n_vocab)

    model = tf.keras.Model(inputs=input, outputs=output)
    return model

In [84]:
# 모델 생성
model = build_model(len(word_to_id), 8, 7, 2)
# 모델 내용 그래프 출력
tf.keras.utils.plot_model(model, 'model.png', show_shapes=True)

AttributeError: ignored

In [None]:
model.predict(train_inputs)