## RNN
현재 정보가 이전 정보가 점층적으로 쌓이면서 정보를 표현할 수 있는 모델이다.    
현재 정보 - Input State    
이전 정보 - Hidden State   

입력 문장을 순차적으로 입력만 하고 마지막으로 입력한 시점에 출력 정보를 뽑아 영화 평점을 예측한다.    
마지막 스텝에 나온 은닉 상태는 문장 전체 정보가 담긴 정보로 LR 또는 Binary Classification을 한다.

In [None]:
%tensorflow_version 2.x 

In [None]:
SEED_NUM = 1234 

import tensorflow as tf 

tf.random.set_seed(SEED_NUM)
print("Tensorflow version " + tf.__version__)

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

strategy = tf.distribute.experimental.TPUStrategy(tpu)
print('REPLICAS: ', strategy.num_replicas_in_sync)

In [None]:
import numpy as np
import json
from tensorflow.keras.preprocessing.sequence import pad_sequences

DATA_PATH = '/content/drive/MyDrive/movie-review-data/'
TRAIN_INPUT_DATA = 'train_input.npy'
TRAIN_LABEL_DATA = 'train_label.npy'
DATA_CONFIGS = 'data_configs.json'

train_input = np.load(open(DATA_PATH + TRAIN_INPUT_DATA, 'rb'))
train_input = pad_sequences(train_input, maxlen=train_input.shape[1])
train_label = np.load(open(DATA_PATH + TRAIN_LABEL_DATA, 'rb'))
prepro_configs = json.load(open(DATA_PATH + DATA_CONFIGS, 'r'))

In [None]:
model_name = 'rnn_classifier_en'
BATCH_SIZE = 128
NUM_EPOCHS = 5
VALID_SPLIT = 0.1
MAX_LEN = train_input.shape[1]

kargs = { 'model_name' : model_name,
          'vocab_size' : prepro_configs['vocab_size'],
          'embedding_dimension' : 100,
          'dropout_rate' : 0.2, 
          'lstm_dimension' : 150,
          'dense_dimension' : 150,
          'output_dimension' : 1}

In [None]:
class RNNClassifier(tf.keras.Model):
  def __init__(self, **kargs):
    super(RNNClassifier, self).__init__(name=kargs['model_name'])
    self.embedding = tf.keras.layers.Embedding(input_dim=kargs['vocab_size'],
                                               output_dim=kargs['embedding_dimension'])
    # return_sequences를 True로 지정할 경우 시퀀스 형태의 Hidden State 벡터가 출력 된다.
    self.lstm_1_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'], return_sequences=True)
    self.lstm_2_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'])
    self.dropout = tf.keras.layers.Dropout(kargs['dropout_rate'])
    self.fc1 = tf.keras.layers.Dense(units=kargs['dense_dimension'], activation=tf.keras.activations.tanh)
    self.fc2 = tf.keras.layers.Dense(units=kargs['output_dimension'], activation=tf.keras.activations.sigmoid)
  
  def call(self, x):
    x = self.embedding(x)
    x = self.dropout(x)
    x = self.lstm_1_layer(x)
    x = self.lstm_2_layer(x)
    x = self.dropout(x)
    x = self.fc1(x)
    x = self.dropout(x)
    x = self.fc2(x)

    return x 

In [None]:
with strategy.scope():
  model = RNNClassifier(**kargs)
  model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy')])


In [None]:
import os 
# earlystop callback추가
earlystop_callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=2)

checkpoint_path = DATA_PATH + model_name + '/weights.h5'
checkpoint_dir = os.path.dirname(checkpoint_path)

if os.path.exists(checkpoint_dir):
  print("{} -- Folder already exists \n".format(checkpoint_dir))
else:
  os.makedirs(checkpoint_dir, exist_ok=True)
  print("{} -- Folder create complete \n".format(checkpoint_dir))

# model checkpoint callback 추가 
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True)

In [None]:
history = model.fit(train_input, train_label, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
                    validation_split=VALID_SPLIT, callbacks=[earlystop_callbacks, cp_callback])

In [None]:
import matplotlib.pyplot as plt

def vis(history,name) :
    plt.title(f"{name.upper()}")
    plt.xlabel('epochs')
    plt.ylabel(f"{name.lower()}")
    value = history.history.get(name)
    val_value = history.history.get(f"val_{name}",None)
    epochs = range(1, len(value)+1)
    plt.plot(epochs, value, 'b-', label=f'training {name}')
    if val_value is not None :
        plt.plot(epochs, val_value, 'r:', label=f'validation {name}')
    plt.legend(loc='upper center', bbox_to_anchor=(0.05, 1.2) , fontsize=10 , ncol=1)
    
def plot_history(history) :
    key_value = list(set([i.split("val_")[-1] for i in list(history.history.keys())]))
    plt.figure(figsize=(12, 4))
    for idx , key in enumerate(key_value) :
        plt.subplot(1, len(key_value), idx+1)
        vis(history, key)
    plt.tight_layout()
    plt.show()

In [None]:
plot_history(history)