<a href="https://colab.research.google.com/github/kimhwijin/TensorflowWithKeras/blob/master/RNN/RNN_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import tensorflow as tf
import numpy as np
import re
import shutil

CHECKPOINT_DIR = 'drive/MyDrive/Colab Notebooks/models/RNN/alice'
DATA_DIR = 'drive/MyDrive/Datasets/alice'


In [6]:
def download_and_read(urls):
  texts = []
  for i, url in enumerate(urls):
    p = tf.keras.utils.get_file("ex1-{:d}.txt".format(i), url, cache_dir=".")
    text = open(p, "r").read()
    text = text.replace("\ufeff", "")
    text = text.replace("\n", " ")
    text = re.sub(r'\s+', " ", text)
    texts.extend(text)
  return texts

texts = download_and_read([
                           "https://www.gutenberg.org/cache/epub/28885/pg28885.txt",
                           "https://www.gutenberg.org/files/12/12-0.txt"
])

vocab = sorted(set(texts))
print("vocab size:{:d}".format(len(vocab)))

#어휘 문자에서 정수로 매핑
char2idx = {c:i for i, c in enumerate(vocab)}
idx2char = {i:c for c , i in char2idx.items()}

#텍스트 수치화
texts_as_ints = np.array([char2idx[c] for c in texts])
data = tf.data.Dataset.from_tensor_slices(texts_as_ints)

#예측하기 전에 보여줄 문자 개수
seq_length = 100
sequences = data.batch(seq_length + 1, drop_remainder=True)

def split_train_labels(sequence):
  input_seq = sequence[0:-1]
  output_seq = sequence[1:]
  return input_seq, output_seq

sequences = sequences.map(split_train_labels)
batch_size = 64
steps_per_epoch = len(texts) // seq_length // batch_size
dataset = sequences.shuffle(10000).batch(batch_size, drop_remainder=True)


vocab size:92


In [10]:
class CharGenModel(tf.keras.Model):
  def __init__(self, vocab_size, num_timesteps, embedding_dim, rnn_output_dim, **kwargs):
    super(CharGenModel, self).__init__(**kwargs)
    self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.rnn_layer = tf.keras.layers.GRU(num_timesteps, recurrent_initializer="glorot_uniform", recurrent_activation="sigmoid", stateful=True, return_sequences=True)
    self.dense_layer = tf.keras.layers.Dense(vocab_size)
  def call(self, x):
    x = self.embedding_layer(x)
    x = self.rnn_layer(x)
    x = self.dense_layer(x)
    return x

vocab_size = len(vocab)
embedding_dim = 256
rnn_output_dim = 1024

model = CharGenModel(vocab_size, seq_length, embedding_dim, rnn_output_dim)
model.build(input_shape=(batch_size, seq_length))

In [13]:
def loss(labels, predictions):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, predictions, from_logits=True)
model.compile(optimizer=tf.optimizers.Adam(), loss=loss)

In [11]:
def generate_text(model, prefix_string, char2idx, idx2char, num_chars_to_generate=1000, temperature=1.0):
  input = [char2idx[s] for s in prefix_string]
  input = tf.expand_dims(input, 0)
  text_generated = []
  model.reset_states()
  for i in range(num_chars_to_generate):
    preds = model(input)
    preds = tf.squeeze(preds, 0) / temperature
    #모델이 반환한 문자 예측
    pred_id = tf.random.categorical(preds, num_samples=1)[-1, 0].numpy()
    text_generated.append(idx2char[pred_id])
  
    #예측을 모델의 다음 입력으로 전달
    input = tf.expand_dims([pred_id], 0)
  return prefix_string + "".join(text_generated)


In [16]:
num_epochs = 50
for i in range(num_epochs // 10):
  model.fit(
      dataset.repeat(),
      epochs=10,
      steps_per_epoch=steps_per_epoch
      #callbacks=[checkpoint_callback, tensorboard_callback]
  )

  checkpoint_file = os.path.join(CHECKPOINT_DIR, "model_eopch_{:d}".format(i+1))
  model.save_weights(checkpoint_file)

  gen_model = CharGenModel(vocab_size, seq_length, embedding_dim, rnn_output_dim)
  gen_model.load_weights(checkpoint_file)
  gen_model.build(input_shape=(1, seq_length))
  print("after epoch: {:d}".format(i+1)*10)
  print(generate_text(gen_model, "Alice ", char2idx, idx2char))
  print("---" * 5)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1after epoch: 1
Alice said happenot a long sevone of Rice! “Who wear umbetting and bestaning a little a dade_—I'd see Tweedledum, as she moatial of the great readd. And very not in our in the stificaps and what's vare--only: "of real: you’re jecatiem---but there know, you shaken evelt say, he was poor way to fory! And I don't van't suppose like to uld then she could not winto its opposiding a glow hearing up of 10ve’s'_ an aven last. "You must. He mortillar of a wroking fine you. It’s tire, it was getting on, you know," said lawly lide had only gried out the to, "help middle of my up out examing houghter proence U," ! Do you day see.” “Now and —in disan her so out on this can see mistart! Howard to--only and doad on silenberg-of Are a deept wither live v

In [19]:
model.save_weights("drive/MyDrive/Colab Notebooks/models/RNN/alice/model_epoch_5")
print(generate_text(gen_model, "Alice ", char2idx, idx2char))

Alice couldn’t no arms and dey she! “ORTY )AN. INFS. PILIIDER WIAD AF At free!” said the Mock Turtle conchies concour these must be sure. “Of cousely and moment week ‘I was turned like more appled of away. ‘Yes, the Rubbin: “I really fady had knout up the come was indigg knows stuse and much; I only would not no more, but a non distribute--now.” The King sthank oftlation] She I’ll said namar?" The Queen, so my exem-trange the soring have Helaster, and asking. The Queen was a joke off the accessed at the Fayshemarke jum to knocking her dreature her riddlatioled with Alice, with as everything not bact stalkinging to the prettiting to pirst in the dist" Project Gutenberg-tm eBooks as sounded each passed a great fecched to gute beforawarms with her. CHAPTER ITWEMD WK. I'n over nicer of the work agaid, and like!” Humpty Dumpty with one now in a beinnst be would statian certor of duch! I’d reme the right know I happen: "the crawning had to hold, she was I was mark and smaller, worse her head