In [1]:
import tensorflow as tf
import numpy as np
import os
import time

## Read Data

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text=open(path_to_file,'rb').read().decode(encoding='utf8')

In [4]:
print(f'Length of text :{len(text)} characters')

Length of text :1115394 characters


In [5]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.


In [6]:
vocab=sorted(set(text))
print(f'{len(vocab)} unique caracters')

65 unique caracters


## Preprocess the text

### Vectorize the text

In [7]:
example_texts=['abcdefg','xyz']
chars=tf.strings.unicode_split(example_texts,input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [8]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab),mask_token=None)

In [9]:
ids=ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [10]:
chars_from_ids=tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(),mask_token=None,invert=True)

In [11]:
chars=chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [12]:
tf.strings.reduce_join(chars,axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [13]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids),axis=-1)

In [14]:
tf.strings.unicode_split(text,'UTF-8')

<tf.Tensor: shape=(1115394,), dtype=string, numpy=array([b'F', b'i', b'r', ..., b'g', b'.', b'\n'], dtype=object)>

In [15]:
all_ids=ids_from_chars(tf.strings.unicode_split(text,'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [16]:
sequence_length=100

In [17]:
ids_dataset=tf.data.Dataset.from_tensor_slices(all_ids)
for ids in ids_dataset.take(10):
  print(chars_from_ids(ids))

tf.Tensor(b'F', shape=(), dtype=string)
tf.Tensor(b'i', shape=(), dtype=string)
tf.Tensor(b'r', shape=(), dtype=string)
tf.Tensor(b's', shape=(), dtype=string)
tf.Tensor(b't', shape=(), dtype=string)
tf.Tensor(b' ', shape=(), dtype=string)
tf.Tensor(b'C', shape=(), dtype=string)
tf.Tensor(b'i', shape=(), dtype=string)
tf.Tensor(b't', shape=(), dtype=string)
tf.Tensor(b'i', shape=(), dtype=string)


In [18]:
sequences=ids_dataset.batch(sequence_length+1,drop_remainder=True)
for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [19]:
for seq in sequences.take(1):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


### Preprocess the Text

In [20]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [21]:
dataset=sequences.map(split_input_target)

In [22]:
for input_example,output_example in dataset.take(1):
  print(f'Input : {text_from_ids(input_example)}')
  print(f'Output : {text_from_ids(output_example)}')

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Output : b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [23]:
BATCH_SIZE=64
BUFFER_SIZE=10000

dataset=(
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE,drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

## Build Model

In [24]:
embedding_dim=256
vocab_size=len(ids_from_chars.get_vocabulary())
rnn_units=1024

In [25]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)


  @tf.function()
  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x



In [26]:
model=MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,)

In [27]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [28]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4022850 (15.35 MB)
Trainable params: 4022850 (15.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [29]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
sampled_indices.shape

(100,)

In [30]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"k wherein my soul recorded\nThe history of all her secret thoughts:\nSo smooth he daub'd his vice with"

Next Char Predictions:
 b'ipzhQWdR!doDem[UNK]!zaQLA\n-reLP??;q3z-pJ?TBpiehUiEKIf3Y3KGS.:!-CzibWyQ;psdbFIksridSWFE:r &au3zdWeTOi$YWP'


In [31]:
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [32]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1892157, shape=(), dtype=float32)


In [33]:
tf.exp(example_batch_mean_loss).numpy()

65.97103

In [34]:
model.compile(optimizer='adam',loss=loss)

In [35]:
checkpoint_dir='/content/training_checkpoints'
checkpoint_prefix=os.path.join(checkpoint_dir,'ckpt_{epoch}')
checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weight=True
)

In [36]:
epochs=30
history=model.fit(dataset,epochs=epochs,callbacks=[checkpoint_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Generate Text

In [37]:
skip_ids = ids_from_chars(['[UNK]'])[:, None]
skip_ids

<tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[0]])>

In [38]:
values=[-float('inf')]*len(skip_ids)

In [39]:
indices=skip_ids,
dense_shape=[len(ids_from_chars.get_vocabulary())]

In [40]:
dense_shape

[66]

In [41]:
sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])

In [42]:
inputs=['ROMEO']
input_chars = tf.strings.unicode_split(inputs, 'UTF-8')

In [43]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

one_step_model = OneStep(model, chars_from_ids, ids_from_chars)




In [44]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [45]:
start=time.time()
states=None
next_char=tf.constant(['ROMEO'])
result=[next_char]
states=None

for n in range(1000):
  next_char,states=one_step_model.generate_one_step(next_char,states=states)
  result.append(next_char)
result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)


ROMEO:
The arm of lions are full of death.
Before I see the end of orph mine eyes aloud
Which now you speak us unhappy? whence 'tis won:
How may I call it: brother, there, be patient: I
House or death: pray she might loathed toward sometimes,
There rust his several peace thou offices' legs,
The noble houses of tears and treason'd Richmond,
So disvused with conscience and our hearts
Will use to swell as cast, and I have heard of sen
And keep the move of you, return'd,
Can make the restless pine of my present pain,
And let mild outrunness and this fash and stuff,
Of the first word with one give thee roy
To sea with an asurn to the end;
And to my rights, to determine of this place,
And this I cannot come there gentle Warwick,
Let him be sent for your lass end you better.
God pardon give no more behind that makes but sworn to the
subject, such as you, and many faith of honour breath.

EDWARD:
Now, sir, this must be proud to hide his losality.
I hear this knees, that we shall happy more res

## CUSTOM Training

In [46]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [47]:
class CustomTraining(MyModel):

  @tf.function
  def train_on_step(self,inputs):
    inputs,labels=inputs
    with tf.GradientTape() as tape:
      predictions=self(inputs,training=True)
      loss=self.loss(predictions,labels)
    grad=tape.gradient(loss,self.trainable_varibales)
    self.optimizer.apply_gradients(zip(grad,self.trainable_variables))

    return {'loss':loss}


In [48]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [49]:
model.fit(dataset, epochs=1)



<keras.src.callbacks.History at 0x7f3fc0309ed0>

In [50]:
epochs=30

mean=tf.metrics.Mean()
for epoch in range(epochs):
  start=time.time()
  mean.reset_states()
  for (batch_n,(inp,target)) in enumerate(dataset):
    logs=model.train_step([inp,target])
    mean.update_state(logs['loss'])
    if batch_n % 50==0:
      template=f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
      print(template)

  if(epoch+1)%5==0:
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

  print()
  print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
  print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
  print("_"*80)
model.save_weights(checkpoint_prefix.format(epoch=epoch))



Epoch 1 Batch 0 Loss 2.6966
Epoch 1 Batch 50 Loss 2.5645
Epoch 1 Batch 100 Loss 2.4590
Epoch 1 Batch 150 Loss 2.3708

Epoch 1 Loss: 2.4971
Time taken for 1 epoch 40.95 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 2.3358
Epoch 2 Batch 50 Loss 2.2632
Epoch 2 Batch 100 Loss 2.2006
Epoch 2 Batch 150 Loss 2.1448

Epoch 2 Loss: 2.2218
Time taken for 1 epoch 26.20 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 2.1229
Epoch 3 Batch 50 Loss 2.0741
Epoch 3 Batch 100 Loss 2.0308
Epoch 3 Batch 150 Loss 1.9922

Epoch 3 Loss: 2.0453
Time taken for 1 epoch 26.11 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.9765
Epoch 4 Batch 50 Loss 1.9409
Epoch 4 Batch 100 Loss 1.9097
Epoch 4 Batch 150 Loss 1.8810

Epoch 4 Loss: 1.9199
Time taken for 1 epoch 40.95 sec
_____________________________________________________________________

In [51]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [52]:
start=time.time()
states=None
next_char=tf.constant(['ROMEO'])
result=[next_char]
states=None

for n in range(1000):
  next_char,states=one_step_model.generate_one_step(next_char,states=states)
  result.append(next_char)
result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
The boldness in your weeding king, my mother
To slay thyself now! Still till Nor Ille
He cometh agreeds; bawd to reprehely me
Above a bloody mind. For well I wot.

GRUMIO:
And ye see there is no frightony than their good seconds
cold courtier as she is not univantled,
And every thin a dial of you then in France,
His youngest daughter, Shepherd's one;
I was awake her sighn; like a fish, how here,
I bud a lady man as woman
For my deserting, Romeo, thence on his treasure,
Nothing but so: and to mine honour beast!
Besides, his favourive with me in four clothes!
Or you have purgest neat, cry 'Who is oft! I have said,
His very man would faint a Jack and reason;
The love thou said, for much subbling than thou hast evident
All that which in their rodes manswer, hath some ill un
appleants back against the last with griefs,
Making anon with kinning secrect love.

KING HENRY VI

RIVERS:
Madam, his heart as much as in a lovelimy house:
It were remain a little helping head.

Second Murderer: