# Chapter 16

In [1]:
from tensorflow import keras
import numpy as np
import tensorflow as tf

In [2]:
shakespeare_url = "https://homl.info/shakespeare" # shortcut URL
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

In [3]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(shakespeare_text)

In [4]:
max_id = len(tokenizer.word_index)
dataset_size = tokenizer.document_count

In [5]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text]))

## create training data

In [6]:
train_size = dataset_size * 90 // 100
n_steps = 100
window_length = n_steps + 1
batch_size = 32

In [7]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda window: (window[:,:-1], window[:, 1:]))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [8]:
dataset = dataset.prefetch(1)

## Create and train model

In [9]:
embedding_dim = 5

In [10]:
model = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[n_steps], batch_size=batch_size),
    keras.layers.Embedding(input_dim=max_id+1, output_dim=embedding_dim),
    keras.layers.GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
    #keras.layers.GRU(128, return_sequences=True, dropout=0.2),
    #keras.layers.GRU(128, return_sequences=True, dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation="softmax"))
])



In [11]:
optimizer = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy")

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (32, 100, 5)              200       
_________________________________________________________________
gru (GRU)                    (32, 100, 128)            51840     
_________________________________________________________________
time_distributed (TimeDistri (32, 100, 39)             5031      
Total params: 57,071
Trainable params: 57,071
Non-trainable params: 0
_________________________________________________________________


In [13]:
from time import strftime
from os import makedirs, path

In [14]:
run_dir = './logs/char_rnn/'
log_dir = run_dir + strftime('%Y%m%d-%H%M%S')
if not path.exists(log_dir):
    makedirs(log_dir)

In [15]:
tensorboard_cb = keras.callbacks.TensorBoard(log_dir=log_dir)
model_cb = keras.callbacks.ModelCheckpoint(run_dir+'char_rnn.h5', save_best_only=True, monitor='loss')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5, monitor='loss')
callbacks = [tensorboard_cb, model_cb, early_stopping_cb]

In [None]:
history = model.fit(dataset, epochs=1)

  31236/Unknown - 2262s 72ms/step - loss: nan