In [1]:
!wget --show-progress --continue -O /content/shakespeare.txt http://www.gutenberg.org/files/100/100-0.txt

--2024-06-08 11:33:29--  http://www.gutenberg.org/files/100/100-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.gutenberg.org/files/100/100-0.txt [following]
--2024-06-08 11:33:29--  https://www.gutenberg.org/files/100/100-0.txt
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5618815 (5.4M) [text/plain]
Saving to: ‘/content/shakespeare.txt’


2024-06-08 11:33:29 (17.3 MB/s) - ‘/content/shakespeare.txt’ saved [5618815/5618815]



In [2]:
!head -n5 /content/shakespeare.txt
!echo "..."
!shuf -n5 /content/shakespeare.txt

*** START OF THE PROJECT GUTENBERG EBOOK THE COMPLETE WORKS OF WILLIAM
SHAKESPEARE ***
﻿The Complete Works of William Shakespeare

by William Shakespeare
...
I swear ’tis better to be much abus’d

KING JOHN.
It is spoke as a Christians ought to speak.
Now, Kate, I am a husband for your turn;


In [3]:
# Collecting data and setting methods for pre-processing

import numpy as np
import tensorflow as tf
import os

from packaging import version
if version.parse(tf.__version__)<version.parse('2.0'):
  raise Exception('This notebook is compatible with TensorFlow 2.0 or higer.')

SHAKESPEARE_TXT = '/content/shakespeare.txt'

def transform(txt):
  return np.asarray([ord(c) for c in txt if ord(c) < 255], dtype =np.int32)

def input_fn(seq_len=100, batch_size=1024):
  """Retrun a dataset of source and target sequences for training."""
  with tf.io.gfile.GFile(SHAKESPEARE_TXT,'r') as f:
    txt = f.read()
  source = tf.constant(transform(txt), dtype=tf.int32)

  ds = tf.data.Dataset.from_tensor_slices(source).batch(seq_len+1,drop_remainder =True)

  def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

  BUFFER_SIZE = 10000

  ds = ds.map(split_input_target).shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder = True)

  return ds.repeat()

In [12]:
from tensorflow.keras.layers import BatchNormalization, Dropout, Attention
from tensorflow.keras.regularizers import l2

EMBEDDING_DIM = 512
DROPOUT_RATE = 0.3
L2 = 0.00

def gru_model(seq_len=100, batch_size=None, stateful=True):
    source = tf.keras.Input(name='seed', shape=(seq_len,), batch_size=batch_size, dtype=tf.int32)

    embedding = tf.keras.layers.Embedding(input_dim=256, output_dim=EMBEDDING_DIM)(source)
    embedding = BatchNormalization()(embedding)

    gru = embedding
    for i in range(5):  # Using fewer layers to start with
        gru = tf.keras.layers.GRU(
            EMBEDDING_DIM, stateful=stateful, return_sequences=True,
            kernel_regularizer=l2(L2)
        )(gru)
        gru = BatchNormalization()(gru)
        gru = Dropout(DROPOUT_RATE)(gru)

    # Adding an attention layer
    attention = Attention()([gru, gru])
    combined = tf.keras.layers.Concatenate()([gru, attention])

    predicted_char = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(256, activation='softmax'))(combined)
    return tf.keras.Model(inputs=[source], outputs=[predicted_char])


In [13]:
# training the model

# BATCH_SIZE = 512
# SEQ_LEN = 100

from tensorflow.keras.callbacks import EarlyStopping

EPOCHS = 50
LEARNING_RATE = 0.01

# Set seed for reproducibility
tf.random.set_seed(42)

# Define the Reduce learning rate on plateau to make an adapting learning rate to escape the local minimum
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = 'loss',
    factor = 0.5,
    patience=2,
    min_lr=0.0001,
    verbose=1
)
early_stopping = EarlyStopping(monitor='loss',patience=3, restore_best_weights=True)


try:
  print("Tensorflow version " + tf.__version__)

  try:
    tf.keras.backend.clear_session()
    # try this block if being able to connect to TPU v2
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print(f'Running on a TPU w/{tpu.num_accelerators()["TPU"]} cores')

    # Check if the TPU system has already been initialized
    if not tf.config.list_logical_devices('TPU'):
      tf.config.experimental_connect_to_cluster(tpu)
      tf.tpu.experimental.initialize_tpu_system(tpu)
    else:
        print("TPU system has already been initialized.")
    tpu_strategy = tf.distribute.TPUStrategy(tpu)

    with tpu_strategy.scope():
      training_model = gru_model(seq_len=100, stateful=False)
      training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                             loss='sparse_categorical_crossentropy',
                             metrics=['sparse_categorical_accuracy'])
      training_model.fit(
          input_fn(),
          steps_per_epoch=100,
          validation_data=input_fn(),
          validation_steps=10,
          epochs=EPOCHS,
          callbacks=[early_stopping, reduce_lr]
      )
      training_model.save_weights('/tmp/bard.h5', overwrite=True)
  except ValueError:
    # raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
    print("ERROR: Not connected to a TPU runtime; trying deprecated TPU connection...")
    # if couldn't connect to TPU v2, connect to TPU (deprecated)



    tf.keras.backend.clear_session()
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    print('All devices', tf.config.list_logical_devices('TPU'))

     # strategy = tf.distribute.TPUStrategy(resolver)
    strategy = tf.distribute.experimental.TPUStrategy(resolver)

    with strategy.scope():
      training_model = gru_model(seq_len=100, stateful=False)
      training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                            loss='sparse_categorical_crossentropy',
                            metrics=['sparse_categorical_accuracy'])
      history = training_model.fit(
          input_fn(),
          steps_per_epoch=100,
          validation_data=input_fn(),
          validation_steps=10,
          epochs=EPOCHS,
          callbacks=[early_stopping, reduce_lr]
      )
      training_model.save_weights('/tmp/bard.h5', overwrite=True)

# Connecting to CPU/GPU if couldn't connnect to either TPU
except Exception as e:
  print(f"TPU connection failed with error: {e}, falling back to CPU/GPU")
  tf.keras.backend.clear_session()
  training_model = gru_model(seq_len=100, stateful=False)
  training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                         loss='sparse_categorical_crossentropy',
                         metrics=['sparse_categorical_accuracy'])
  training_model.fit(
          input_fn(),
          steps_per_epoch=100,
          validation_data=input_fn(),
          validation_steps=10,
          epochs=EPOCHS,
          callbacks=[early_stopping, reduce_lr]
      )
  training_model.save_weights('/tmp/bard.h5', overwrite=True)

Tensorflow version 2.15.0
Running on a TPU w/8 cores
TPU system has already been initialized.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
# Predicting values (generating text)

BATCH_SIZE = 5
PREDICT_LEN = 250

prediction_model = gru_model(seq_len=1, batch_size=BATCH_SIZE, stateful=True)
prediction_model.load_weights('/tmp/bard.h5')
# print(prediction_model.shape)

seed_txt = 'Looks it not like the king?  Verily, we must go! '
seed = transform(seed_txt)
seed = np.repeat(np.expand_dims(seed,0),BATCH_SIZE, axis=0)

prediction_model.reset_states()
for i in range(len(seed_txt)-1):
  prediction_model.predict(seed[:,i:i+1], verbose=0)

predictions = [seed[:,-1:]]
for i in range(PREDICT_LEN):
  last_word = predictions[-1]
  last_word = np.array(last_word).reshape((-1,1))
  # print(last_word.shape)
  next_probits = prediction_model.predict(last_word, verbose=0)[:, 0, :]

  next_idx = [
      np.random.choice(256, p=next_probits[i])
      for i in range(BATCH_SIZE)
  ]
  predictions.append(np.asarray(next_idx,dtype=np.int32))

for i in range (BATCH_SIZE):
  print('PREDICTION %d\n\n' % i)
  p = [predictions[j][i] for j in range(PREDICT_LEN)]
  # generated = ''.join([chr(c) for c in p])
  generated = ''.join([chr(int(c)) for c in p])
  print(generated)
  # print()
  assert len(generated) == PREDICT_LEN, 'Generated text too short.'

PREDICTION 0


 Richard in ùastand.
Their _ad for smild chasted not       606

KING ÑIIBAN.
As have                      942
¯arge een quality by Xenwick,
What he oments name but, sir
INGER III. Norsoner falseing ¢alice 9SHON IISHAM.
~ow enter OctaviisBan
PREDICTION 1


 Ill  to
 INGHAM AND IV ANDRANW OF SYRACUSE.
The Æhe lay we do me,
And of one ´ON HENRY.
Now enter them      Somerset like !ingd well they Áen.
Shall quarrel this by Hesmen Caesars ales üIBIX  BLEUT
HAMLET AND FARTHER.
O, and a ambition my ma
PREDICTION 2


 Look and Eather guard æaly free prepare
The ing }erberance Claudio in  princess by the issue to dead Netrack
Let us entistance and I ïPHESUS FRYER.
 stand so hardRhinked before have us any noble 9man your ¢ing:
In be part engable be(
Makes th
PREDICTION 3


 you sand of 2enà
This once had not Ill dimn did
   And he ûhat the ¢ales Âd now Temptì

SECOND OF SYRACUSE.
To infect notwith of favours arcellence.
Have every oljerly have pay n

  generated = ''.join([chr(int(c)) for c in p])
