**The Idea is to generate poem text using shakespear data**

# Import libraries

In [2]:
import tensorflow as tf
print(tf.__version__)

2.12.0


In [3]:
import numpy as np

# Get shakespear data

In [5]:
shakespeare_url = 'https://homl.info/shakespeare'
shakespeare_filepath = tf.keras.utils.get_file('shakespeare.txt', shakespeare_url)

with open(shakespeare_filepath) as f:
  shakespeare_text = f.read()

print(shakespeare_text[:50])

First Citizen:
Before we proceed any further, hear


#  Build model

## Text Vectorization

In [9]:
text_vec_layer = tf.keras.layers.TextVectorization(split="character", standardize="lower")

text_vec_layer.adapt([shakespeare_text])

encoded = text_vec_layer([shakespeare_text])[0]

encoded

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([21,  7, 10, ..., 22, 28, 12])>

In [10]:
encoded -=2 # To remove padded tokens 0,1

In [13]:
dataset_size = len(encoded)

n_tokens = text_vec_layer.vocabulary_size() - 2

n_tokens

39

## Make windows of text, shuffle and batch them

In [14]:
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):

  ds = tf.data.Dataset.from_tensor_slices(sequence) # get the sequence

  ds = ds.window(length + 1, shift=1, drop_remainder=True) # partition them into windows, we add extra 1 because we need to predict a character target

  ds = ds.flat_map(lambda window_ds: window_ds.batch(length+1)) # make a flatmap batch with window size

  if shuffle:
    ds = ds.shuffle(buffer_size=100_000, seed=seed)

  ds = ds.batch(batch_size) # batch them

  return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1) # split into input, output pairs and activate prefetch



## Generate train, vadlidation, text data

In [15]:
length = 100 # length of each window

tf.random.set_seed(42)

train_set = to_dataset(encoded[:1000000], length=length, shuffle=True, seed=42)

validation_set = to_dataset(encoded[1000000: 1060000], length=length)

test_set = to_dataset(encoded[1060000:], length=length)

## Build core model

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim= n_tokens, output_dim= 16),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
  ], name="shakespeare_poem")

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="nadam",
    metrics=["accuracy"]
    )

model_chkpt = tf.keras.callbacks.ModelCheckpoint(
    "shakespeare_poem",
    monitor="val_accuracy",
    save_best_only=True
    )

history = model.fit(
    train_set,
    validation_data=validation_set,
    epochs=10,
    callbacks=[model_chkpt]
    )

# Final model with text preprocessing

shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda x: x -2),
    model
])


Epoch 1/10
     66/Unknown - 32s 124ms/step - loss: 3.2223 - accuracy: 0.1453