In [21]:
import numpy as np
import tensorflow as tf
import keras
from datetime import datetime

In [22]:
path_to_file = keras.utils.get_file(
    'shakespeare.txt',
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
)
print(path_to_file)

with open(path_to_file) as f:
    shakespeare_text = f.read()
print(shakespeare_text[:148])

shakespeare_tensor = tf.constant([shakespeare_text])

/tmp/.keras/datasets/shakespeare.txt
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?



In [23]:
vocab = list(set(shakespeare_text.lower().strip()))
print(vocab)
print(f"vocab size = {len(vocab)}")

['3', ' ', 'p', '!', 'j', 'g', 'z', 'l', 'e', ',', 'w', 'm', 'r', 'x', 'q', ':', '-', 'h', ';', 'y', 'u', 'k', 's', 'c', 'b', 'i', '\n', "'", 't', 'o', '?', '.', 'v', 'a', 'd', 'n', '$', '&', 'f']
vocab size = 39


In [24]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level=True, lower=True)
tokenizer.fit_on_texts(shakespeare_text.lower())
config = tokenizer.get_config()
tokenizer.texts_to_sequences(["hello world", "hello world"])

[[7, 2, 12, 12, 4, 1, 17, 4, 9, 12, 13],
 [7, 2, 12, 12, 4, 1, 17, 4, 9, 12, 13]]

In [25]:
n_tokens = len(tokenizer.word_index)
dataset_size = tokenizer.document_count

In [26]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
print(f"{encoded[:100]}")

[19  5  8  7  2  0 18  5  2  5 35  1  9 23 10 21  1 19  3  8  1  0 16  1
  0 22  8  3 18  1  1 12  0  4  9 15  0 19 13  8  2  6  1  8 17  0  6  1
  4  8  0 14  1  0  7 22  1  4 24 26 10 10  4 11 11 23 10  7 22  1  4 24
 17  0  7 22  1  4 24 26 10 10 19  5  8  7  2  0 18  5  2  5 35  1  9 23
 10 15  3 13]


# Create Dataset

In [27]:
train_size = dataset_size * 90 // 100
print(f"train_size = {train_size}")

train_size = 1003854


In [28]:
dataset = tf.data.Dataset.from_tensor_slices(encoded)
#dataset = dataset.repeat()
n_steps = 50
dataset = dataset.window(n_steps + 1, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window_ds: window_ds.batch(n_steps + 1))

In [29]:
for x in dataset.take(2):
    print(x)

tf.Tensor(
[19  5  8  7  2  0 18  5  2  5 35  1  9 23 10 21  1 19  3  8  1  0 16  1
  0 22  8  3 18  1  1 12  0  4  9 15  0 19 13  8  2  6  1  8 17  0  6  1
  4  8  0], shape=(51,), dtype=int64)
tf.Tensor(
[ 5  8  7  2  0 18  5  2  5 35  1  9 23 10 21  1 19  3  8  1  0 16  1  0
 22  8  3 18  1  1 12  0  4  9 15  0 19 13  8  2  6  1  8 17  0  6  1  4
  8  0 14], shape=(51,), dtype=int64)


2025-05-08 18:34:38.814581: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:38.814732: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]


In [30]:
dataset = dataset.batch(128)

In [31]:
for x in dataset.take(1):
    print(x.shape)

(128, 51)


2025-05-08 18:34:38.881393: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:38.881511: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]


In [32]:
dataset = dataset.map(lambda window: (window[:, :-1], window[:, 1:]))

In [33]:
dataset = dataset.cache().prefetch(tf.data.experimental.AUTOTUNE)

In [34]:
for x, y in dataset.take(1):
    print(f"x = {x.shape}")
    print(f"y = {y.shape}")    

2025-05-08 18:34:38.950623: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:38.950768: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:38.967550: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input

x = (128, 50)
y = (128, 50)


In [35]:
transform = lambda x, y: (tf.one_hot(x, depth=n_tokens), y)
dataset = dataset.map(transform)
for x, y in dataset.take(1):
    print(f"x = {x.shape}")
    print(f"y = {y.shape}")

2025-05-08 18:34:39.032739: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:39.032874: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:39.052032: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input

x = (128, 50, 39)
y = (128, 50)


# Create Models

In [36]:
m1 = keras.Sequential(
    [
        keras.layers.Input(shape=[None, n_tokens]),
        keras.layers.GRU(128, return_sequences=True),
        keras.layers.Dense(n_tokens, activation="softmax")
    ]
)
m1.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
m1.summary()
model = m1

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_1 (GRU)                 (None, None, 128)         64896     
                                                                 
 dense_1 (Dense)             (None, None, 39)          5031      
                                                                 
Total params: 69,927
Trainable params: 69,927
Non-trainable params: 0
_________________________________________________________________


2025-05-08 18:34:39.181176: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-05-08 18:34:39.181852: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-05-08 18:34:39.182525: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [37]:
text = "to be "
tokens = tokenizer.texts_to_sequences([text])
inp_tf = tf.constant(tokens)
inp_tf = tf.one_hot(inp_tf, depth=n_tokens)
print(inp_tf.shape)
y = model.predict(inp_tf)
print(y.shape)
y = tf.argmax(y[0,-1])

(1, 6, 39)
(1, 6, 39)


2025-05-08 18:34:39.292180: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-05-08 18:34:39.293060: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-05-08 18:34:39.293998: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [38]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpus))

print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

Num GPUs Available:  0
2.12.0
[]


In [39]:
early_stopping = keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.002, patience=2)
history = m1.fit(dataset, epochs=10, steps_per_epoch=train_size // 128, callbacks=[early_stopping], verbose=2)
m1.save("models/shakespeare_gru_1.keras")

Epoch 1/10


2025-05-08 18:34:39.417169: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:39.417318: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [1115394]
	 [[{{node Placeholder/_0}}]]
2025-05-08 18:34:39.503009: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_d

7842/7842 - 329s - loss: 1.9071 - 329s/epoch - 42ms/step
Epoch 2/10
7842/7842 - 36s - loss: 1.6950 - 36s/epoch - 5ms/step


In [40]:
m2 = keras.Sequential(
    [
        keras.layers.Input(shape=[None, n_tokens]),
        keras.layers.LSTM(128, return_sequences=True),
        keras.layers.Dense(n_tokens, activation="softmax")
    ]
)
m2.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
m2.summary()
early_stopping = keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.002, patience=2)
# start_time = time.time.now()
h2 = m2.fit(dataset, epochs=10, steps_per_epoch=train_size // 128, callbacks=[early_stopping], verbose=2)
# runtime = time.perf_counter() - start_time
# print(f"Time {runtime:,.0f} seconds.")
m2.save("models/shakespeare_lstm_1.keras")

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 128)         86016     
                                                                 
 dense_2 (Dense)             (None, None, 39)          5031      
                                                                 
Total params: 91,047
Trainable params: 91,047
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10


2025-05-08 18:40:44.655335: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-05-08 18:40:44.655960: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-05-08 18:40:44.656378: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

7842/7842 - 400s - loss: 2.0275 - 400s/epoch - 51ms/step
Epoch 2/10
7842/7842 - 34s - loss: 1.7448 - 34s/epoch - 4ms/step


In [41]:
m3 = keras.Sequential(
        [
                keras.layers.Input(shape=(None, n_tokens)),
                keras.layers.GRU(128, return_sequences=True),
                keras.layers.GRU(128, return_sequences=True),
                keras.layers.Dense(n_tokens, activation="softmax")
                ],
        )
m3.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
m3.summary()
model = m3

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_2 (GRU)                 (None, None, 128)         64896     
                                                                 
 gru_3 (GRU)                 (None, None, 128)         99072     
                                                                 
 dense_3 (Dense)             (None, None, 39)          5031      
                                                                 
Total params: 168,999
Trainable params: 168,999
Non-trainable params: 0
_________________________________________________________________


2025-05-08 18:47:58.655210: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-05-08 18:47:58.655861: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-05-08 18:47:58.656333: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.002, patience=2)
start_time = datetime.now()
history3 = m3.fit(dataset, epochs=10, steps_per_epoch=train_size // 128, callbacks=[early_stopping], verbose=2)
elapsed_time3 = datetime.now() - start_time
print(f"Training completed in {elapsed_time3:.0f} seconds.")
m3.save("models/shakespeare_gru_2.keras")

In [None]:
import matplotlib.pyplot as plt

# Extract the loss values from the three history objects
loss1 = history.history['loss']
loss2 = h2.history['loss']
loss3 = history3.history['loss']

# Plot the training losses
plt.plot(loss1, label="Run 1 (GRU)", linestyle='-', color='blue')
plt.plot(loss2, label="Run 2 (LSTM)", linestyle='--', color='orange')
plt.plot(loss3, label="Run 3 (Stacked GRU)", linestyle='-.', color='green')

# Add labels, title, and legend
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Comparison of Training Histories")
plt.legend()

# Display the plot
plt.show()