# Env

In [None]:
# imports
import argparse
import os
import random
import shutil
import json
import zipfile
import math
import copy
import collections
import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sentencepiece as spm
import tensorflow as tf
import tensorflow.keras.backend as K

from tqdm.notebook import tqdm, trange

In [None]:
# 환경 설정
args = {
    # random seed value
    "seed": 1234,
}
args = argparse.Namespace(**args)

print(args)

In [None]:
# random seed 설정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [None]:
# data dir
data_dir = '/content/drive/MyDrive/문서/강의계획서/삼성전기/삼성전기.20220228/data'
os.listdir(data_dir)

# Vocabulary*

In [None]:
# vocab loading
vocab = spm.SentencePieceProcessor()
vocab.load(os.path.join(data_dir, 'kowiki', 'kowiki_32000.model'))

# Data & Input

In [None]:
# 입력 문장
sentence = "나는 수학 학생 영어 선생님 과학 조교 친구 입니다"

In [None]:
# 학습용 입력 데이터 생성
train_inputs = []
train_inputs.append(vocab.encode_as_ids(sentence))

# train inputs을 numpy array로 변환
train_inputs = np.array(train_inputs)

train_inputs

In [None]:
# embedding
embedding = tf.keras.layers.Embedding(len(vocab), 4)

In [None]:
# 단어벡터
hidden = embedding(train_inputs)
hidden

# RNN

## tutorial

In [None]:
# rnn
rnn = tf.keras.layers.SimpleRNN(units=3, return_sequences=True)
rnn_hidden = rnn(hidden)  # (bs, n_seq, units)
rnn_hidden

In [None]:
Wx, Wh, b = rnn.get_weights()
Wx, Wh, b

In [None]:
Wx.shape, Wh.shape, b.shape

In [None]:
t_hidden = tf.squeeze(hidden)
t_hidden

In [None]:
hidden_prev = np.zeros((1, 3)).astype(np.float32)
for i in range(t_hidden.shape[0]):
    x = t_hidden[i:i+1]
    # print(x)
    hidden_next = tf.tanh(tf.matmul(hidden_prev, Wh) + tf.matmul(x, Wx) + b)
    print(hidden_next)
    hidden_prev = hidden_next

## return sequence=False, return_state=False

In [None]:
# rnn
rnn = tf.keras.layers.SimpleRNN(units=5)
rnn_hidden = rnn(hidden)  # (bs, units)
rnn_hidden

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
rnn = tf.keras.layers.SimpleRNN(units=5, return_sequences=True)
rnn_hidden = rnn(hidden)  # (bs, n_seq, units)
rnn_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
rnn = tf.keras.layers.SimpleRNN(units=5, return_state=True)
rnn_hidden, fw_state = rnn(hidden)  # (bs, units), (bs, units)
rnn_hidden, fw_state

In [None]:
# run with initial_state
rnn2 = tf.keras.layers.SimpleRNN(units=5)
rnn_hidden = rnn2(hidden, initial_state=[fw_state])  # (bs, units)
print(rnn_hidden)

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
rnn = tf.keras.layers.SimpleRNN(units=5, return_sequences=True, return_state=True)
rnn_hidden, fw_state = rnn(hidden)  # (bs, n_seq, units), (bs, units)
rnn_hidden, fw_state

In [None]:
# run with initial_state
rnn2 = tf.keras.layers.SimpleRNN(units=5)
rnn_hidden = rnn2(hidden, initial_state=[fw_state])  # (bs, units)
print(rnn_hidden)

# LSTM

## tutorial

In [None]:
# lstm
lstm = tf.keras.layers.LSTM(units=3, return_sequences=True)
lstm_hidden = lstm(hidden)  # (bs, n_seq, units)
print(lstm_hidden)

In [None]:
Wx, Wh, b = lstm.get_weights()
Wx, Wh, b

In [None]:
Wx.shape, Wh.shape, b.shape

In [None]:
# Wx = (Wxi, Wxf, Wxc, Wxo)
Wxi, Wxf, Wxc, Wxo = Wx[:, 0:3], Wx[:, 3:6], Wx[:, 6:9], Wx[:, 9:12]
Wxi, Wxf, Wxc, Wxo

In [None]:
# Wh = (Whi, Whf, Whc, Who)
Whi, Whf, Whc, Who = Wh[:, 0:3], Wh[:, 3:6], Wh[:, 6:9], Wh[:, 9:12]
Whi, Whf, Whc, Who

In [None]:
# b = (bi, bf, bc, bo)
bi, bf, bc, bo = b[0:3], b[3:6], b[6:9], b[9:12]
bi, bf, bc, bo

In [None]:
t_hidden = tf.squeeze(hidden)
t_hidden

In [None]:
hidden_prev = np.zeros((1, 3)).astype(np.float32)
c_prev = np.zeros((1, 3)).astype(np.float32)

for i in range(t_hidden.shape[0]):
    x = t_hidden[i:i+1]
    # print(x)
    ft = tf.sigmoid(tf.matmul(hidden_prev, Whf) + tf.matmul(x, Wxf) + bf)
    # print(ft)
    it = tf.sigmoid(tf.matmul(hidden_prev, Whi) + tf.matmul(x, Wxi) + bi)
    # print(it)
    ot = tf.sigmoid(tf.matmul(hidden_prev, Who) + tf.matmul(x, Wxo) + bo)
    # print(ot)
    c_tilde = tf.tanh(tf.matmul(hidden_prev, Whc) + tf.matmul(x, Wxc) + bc)
    # print(c_tilde)
    c_next = ft * c_prev + it * c_tilde
    # print(c_next)
    c_prev = c_next
    hidden_next = ot * tf.tanh(c_next)
    print(hidden_next)
    hidden_prev = hidden_next

In [None]:
lstm_hidden

## return sequence=False, return_state=False

In [None]:
# lstm
lstm = tf.keras.layers.LSTM(units=5)
lstm_hidden = lstm(hidden)  # (bs, units)
lstm_hidden

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
lstm = tf.keras.layers.LSTM(units=5, return_sequences=True)
lstm_hidden = lstm(hidden)  # (bs, units)
lstm_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
lstm = tf.keras.layers.LSTM(units=5, return_state=True)
lstm_hidden, fw_state, fw_cell = lstm(hidden)  # (bs, units), (bs, units), (bs, units)
lstm_hidden, fw_state, fw_cell

In [None]:
# run with initial_state
lstm2 = tf.keras.layers.LSTM(units=5)
lstm_hidden = lstm2(hidden, initial_state=[fw_state, fw_cell])  # (bs, units)
lstm_hidden

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
lstm = tf.keras.layers.LSTM(units=5, return_sequences=True, return_state=True)
lstm_hidden, fw_state, fw_cell = lstm(hidden)  # (bs, n_seq, units), (bs, units), (bs, units)
lstm_hidden, fw_state, fw_cell

In [None]:
# run with initial_state
lstm2 = tf.keras.layers.LSTM(units=5)
lstm_hidden = lstm2(hidden, initial_state=[fw_state, fw_cell])  # (bs, units)
lstm_hidden

# GRU

## tutorial

In [None]:
# gru
gru = tf.keras.layers.GRU(units=3, return_sequences=True)
gru_hidden = gru(hidden)  # (bs, n_seq, units)
gru_hidden

In [None]:
Wx, Wh, b = gru.get_weights()
Wx, Wh, b

In [None]:
Wx.shape, Wh.shape, b.shape

In [None]:
# Wx = (Wxu, Wxr, Wxg)
Wxu, Wxr, Wxg = Wx[:, 0:3], Wx[:, 3:6], Wx[:, 6:9]
Wxu, Wxr, Wxg

In [None]:
# Wh = (Whu, Whr, Whg)
Whu, Whr, Whg = Wh[:, 0:3], Wh[:, 3:6], Wh[:, 6:9]
Whu, Whr, Whg

In [None]:
# b = ((bxu, bxr, bxg), (bhu, bhr, bhg))
bxu, bxr, bxg = b[0, 0:3], b[0, 3:6], b[0, 6:9]
bhu, bhr, bhg = b[1, 0:3], b[1, 3:6], b[1, 6:9]
bxu, bxr, bxg, bhu, bhr, bhg

In [None]:
t_hidden = tf.squeeze(hidden)
t_hidden

In [None]:
hidden_prev = np.zeros((1, 3)).astype(np.float32)

for i in range(t_hidden.shape[0]):
    x = t_hidden[i:i+1]
    # print(x)
    rt = tf.sigmoid(tf.matmul(hidden_prev, Whr) + bhr + tf.matmul(x, Wxr) + bxr)
    # print(rt)
    ut = tf.sigmoid(tf.matmul(hidden_prev, Whu) + bhu + tf.matmul(x, Wxu) + bxu)
    # print(ut)
    h_tilde = tf.tanh(rt * (tf.matmul(hidden_prev, Whg) + bhg) + tf.matmul(x, Wxg) + bxg)
    # print(h_tilde)
    hidden_next = ut * hidden_prev + (1 - ut) * h_tilde
    print(hidden_next)
    hidden_prev = hidden_next

In [None]:
gru_hidden

## return sequence=False, return_state=False

In [None]:
# gru
gru = tf.keras.layers.GRU(units=5)
gru_hidden = gru(hidden)  # (bs, units)
gru_hidden

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
gru = tf.keras.layers.GRU(units=5, return_sequences=True)
gru_hidden = gru(hidden)  # (bs, units)
gru_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
gru = tf.keras.layers.GRU(units=5, return_state=True)
gru_hidden, fw_state = gru(hidden)  # (bs, units)
gru_hidden, fw_state

In [None]:
# run with initial_state
gru2 = tf.keras.layers.GRU(units=5)
gru_hidden = gru2(hidden, initial_state=[fw_state])  # (bs, units)
gru_hidden

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
gru = tf.keras.layers.GRU(units=5, return_sequences=True, return_state=True)
gru_hidden, fw_state = gru(hidden)  # (bs, units)
gru_hidden, fw_state

In [None]:
# run with initial_state
gru2 = tf.keras.layers.GRU(units=5)
gru_hidden = gru2(hidden, initial_state=[fw_state])  # (bs, units)
gru_hidden

# Bidirectional RNN

## return sequence=False, return_state=False

In [None]:
# bi rnn
fw_rnn = tf.keras.layers.SimpleRNN(units=5)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden = bi_rnn(hidden)  # (bs, units * 2)
rnn_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_rnn.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

In [None]:
# bi rnn
fw_rnn = tf.keras.layers.SimpleRNN(units=5)
bw_rnn = tf.keras.layers.SimpleRNN(units=5, go_backwards=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn, backward_layer=bw_rnn)
rnn_hidden = bi_rnn(hidden)  # (bs, units * 2)
rnn_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_rnn.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
fw_rnn = tf.keras.layers.SimpleRNN(units=5, return_sequences=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden = bi_rnn(hidden)  # (bs, seq, units* 2)
rnn_hidden

In [None]:
# (return_sequences=True)
fw_rnn = tf.keras.layers.SimpleRNN(units=5, return_sequences=True)
bw_rnn = tf.keras.layers.SimpleRNN(units=5, go_backwards=True, return_sequences=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn, backward_layer=bw_rnn)
rnn_hidden = bi_rnn(hidden)  # (bs, seq, units* 2)
rnn_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
fw_rnn = tf.keras.layers.SimpleRNN(units=5, return_state=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden, fw_state, bw_state = bi_rnn(hidden)  # (bs, units* 2), (bs, units), (bs, units)
rnn_hidden, fw_state, bw_state

In [None]:
fw_rnn = tf.keras.layers.SimpleRNN(units=5)
bi_rnn2 = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden = bi_rnn2(hidden, initial_state=[fw_state, bw_state])
rnn_hidden

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
fw_rnn = tf.keras.layers.SimpleRNN(units=5, return_sequences=True, return_state=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden, fw_state, bw_state = bi_rnn(hidden)  # (bs, seq, units* 2), (bs, units), (bs, units)
rnn_hidden, fw_state, bw_state

In [None]:
fw_rnn = tf.keras.layers.SimpleRNN(units=5)
bi_rnn2 = tf.keras.layers.Bidirectional(fw_rnn)
rnn_hidden = bi_rnn2(hidden, initial_state=[fw_state, bw_state])
rnn_hidden

# Bidirectional LSTM

## return sequence=False, return_state=False

In [None]:
# bi lstm
fw_lstm = tf.keras.layers.LSTM(units=5)
bi_lstm = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden = bi_lstm(hidden)  # (bs, units * 2)
lstm_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_lstm.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

In [None]:
# bi rnn
fw_lstm = tf.keras.layers.LSTM(units=5)
bw_lstm = tf.keras.layers.LSTM(units=5, go_backwards=True)
bi_lstm = tf.keras.layers.Bidirectional(fw_lstm, backward_layer=bw_lstm)
lstm_hidden = bi_lstm(hidden)  # (bs, units * 2)
lstm_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_lstm.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
fw_lstm = tf.keras.layers.LSTM(units=5, return_sequences=True)
bi_lstm = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden = bi_lstm(hidden)  # (bs, seq, units* 2)
lstm_hidden

In [None]:
# (return_sequences=True)
fw_lstm = tf.keras.layers.LSTM(units=5, return_sequences=True)
bw_lstm= tf.keras.layers.LSTM(units=5, go_backwards=True, return_sequences=True)
bi_rnn = tf.keras.layers.Bidirectional(fw_lstm, backward_layer=bw_lstm)
lstm_hidden = bi_lstm(hidden)  # (bs, seq, units* 2)
lstm_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
fw_lstm = tf.keras.layers.LSTM(units=5, return_state=True)
bi_lstm = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden, fw_state, fw_cell, bw_state, bw_cell = bi_lstm(hidden)  # (bs, units* 2), (bs, units), (bs, units), (bs, units), (bs, units)
lstm_hidden, fw_state, fw_cell, bw_state, bw_cell

In [None]:
fw_lstm = tf.keras.layers.LSTM(units=5)
bi_lstm2 = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden = bi_lstm2(hidden, initial_state=[fw_state, fw_cell, bw_state, bw_cell])
lstm_hidden

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
fw_lstm = tf.keras.layers.LSTM(units=5, return_sequences=True, return_state=True)
bi_lstm = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden, fw_state, fw_cell, bw_state, bw_cell = bi_lstm(hidden)  # (bs, units* 2), (bs, units), (bs, units), (bs, units), (bs, units)
lstm_hidden, fw_state, fw_cell, bw_state, bw_cell

In [None]:
fw_lstm = tf.keras.layers.LSTM(units=5)
bi_lstm2 = tf.keras.layers.Bidirectional(fw_lstm)
lstm_hidden = bi_lstm2(hidden, initial_state=[fw_state, fw_cell, bw_state, bw_cell])
lstm_hidden

# Bidirectional GRU


## return sequence=False, return_state=False

In [None]:
# bi gru
fw_gru = tf.keras.layers.GRU(units=5)
bi_gru = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden = bi_gru(hidden)  # (bs, units * 2)
gru_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_gru.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

In [None]:
# bi rnn
fw_gru = tf.keras.layers.GRU(units=5)
bw_gru = tf.keras.layers.GRU(units=5, go_backwards=True)
bi_gru = tf.keras.layers.Bidirectional(fw_gru, backward_layer=bw_gru)
gru_hidden = bi_gru(hidden)  # (bs, units * 2)
gru_hidden

In [None]:
# bi rnn weights
Wfx, Wfh, bf, Wbx, Wbh, bb = bi_gru.get_weights()
Wfx, Wfh, bf, Wbx, Wbh, bb

## return sequence=True, return_state=False

In [None]:
# (return_sequences=True)
fw_gru = tf.keras.layers.GRU(units=5, return_sequences=True)
bi_gru = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden = bi_gru(hidden)  # (bs, seq, units* 2)
gru_hidden

In [None]:
# (return_sequences=True)
fw_gru = tf.keras.layers.GRU(units=5, return_sequences=True)
bw_gru = tf.keras.layers.GRU(units=5, go_backwards=True, return_sequences=True)
bi_gru = tf.keras.layers.Bidirectional(fw_gru, backward_layer=bw_gru)
gru_hidden = bi_gru(hidden)  # (bs, seq, units* 2)
gru_hidden

## return sequence=False, return_state=True

In [None]:
# (return_state=True)
fw_gru = tf.keras.layers.GRU(units=5, return_state=True)
bi_gru = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden, fw_state, bw_state = bi_gru(hidden)  # (bs, units* 2), (bs, units), (bs, units)
gru_hidden, fw_state, bw_state

In [None]:
fw_gru = tf.keras.layers.GRU(units=5)
bi_gru2 = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden = bi_gru2(hidden, initial_state=[fw_state, bw_state])
gru_hidden

## return sequence=True, return_state=True

In [None]:
# (return_sequences=True, return_state=True)
fw_gru = tf.keras.layers.GRU(units=5, return_sequences=True, return_state=True)
bi_gru = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden, fw_state, bw_state = bi_gru(hidden)  # (bs, seq, units* 2), (bs, units), (bs, units)
gru_hidden, fw_state, bw_state

In [None]:
fw_gru = tf.keras.layers.GRU(units=5)
bi_gru2 = tf.keras.layers.Bidirectional(fw_gru)
gru_hidden = bi_gru2(hidden, initial_state=[fw_state, bw_state])
gru_hidden

# 실습
- RNN, LSTM, GRU, BiRNN, BiLSTM, BiGRU 모두를 사용한 문장분류 모델을 생성해 보세요.
- 생성된 모델의 plot을 그려보세요.