In [1]:
!wget --show-progress --continue -O /content/shakespeare.txt http://www.gutenberg.org/files/100/100-0.txt

--2024-05-21 12:16:20--  http://www.gutenberg.org/files/100/100-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.gutenberg.org/files/100/100-0.txt [following]
--2024-05-21 12:16:21--  https://www.gutenberg.org/files/100/100-0.txt
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable

    The file is already fully retrieved; nothing to do.



In [2]:
!head -n5 /content/shakespeare.txt
!echo "..."
!shuf -n5 /content/shakespeare.txt

﻿*** START OF THE PROJECT GUTENBERG EBOOK THE COMPLETE WORKS OF WILLIAM
SHAKESPEARE ***
﻿The Complete Works of William Shakespeare

by William Shakespeare
...
My heart prays for him, though my tongue do curse.
Remembering how I love thy company.

breath?
Well, sir, we must have you find your legs.


In [3]:
# Collecting data and setting methods for pre-processing

import numpy as np
import tensorflow as tf
import os

from packaging import version
if version.parse(tf.__version__)<version.parse('2.0'):
  raise Exception('This notebook is compatible with TensorFlow 2.0 or higer.')

SHAKESPEARE_TXT = '/content/shakespeare.txt'

def transform(txt):
  return np.asarray([ord(c) for c in txt if ord(c) < 255], dtype =np.int32)

def input_fn(seq_len=100, batch_size=1024):
  """Retrun a dataset of source and target sequences for training."""
  with tf.io.gfile.GFile(SHAKESPEARE_TXT,'r') as f:
    txt = f.read()
  source = tf.constant(transform(txt), dtype=tf.int32)

  ds = tf.data.Dataset.from_tensor_slices(source).batch(seq_len+1,drop_remainder =True)

  def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

  BUFFER_SIZE = 10000

  ds = ds.map(split_input_target).shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder = True)

  return ds.repeat()

In [4]:
# Building the model using LSTM to retain coherency in longer sentences

from tensorflow.keras.layers import BatchNormalization, Dropout

EMBEDDING_DIM = 512
DROPOUT_RATE=0.3

def lstm_model(seq_len=100,batch_size=None, stateful=True):
  source = tf.keras.Input(name ='seed', shape=(seq_len,),batch_size=batch_size, dtype=tf.int32)

  embedding = tf.keras.layers.Embedding(input_dim=256, output_dim=EMBEDDING_DIM)(source)
  lstm_1 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(embedding)
  lstm_1 = BatchNormalization()(lstm_1)
  lstm_1 = Dropout(DROPOUT_RATE)(lstm_1)

  lstm_2 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_1)
  lstm_2 = BatchNormalization()(lstm_2)
  lstm_2 = Dropout(DROPOUT_RATE)(lstm_2)

  lstm_3 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_2)
  lstm_3 = BatchNormalization()(lstm_3)
  lstm_3 = Dropout(DROPOUT_RATE)(lstm_3)

  lstm_4 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_3)
  lstm_4 = BatchNormalization()(lstm_4)
  lstm_4 = Dropout(DROPOUT_RATE)(lstm_4)

  lstm_5 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_4)
  lstm_5 = BatchNormalization()(lstm_5)
  lstm_5 = Dropout(DROPOUT_RATE)(lstm_5)

  lstm_6 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_5)
  lstm_6 = BatchNormalization()(lstm_6)
  lstm_6 = Dropout(DROPOUT_RATE)(lstm_6)

  lstm_7 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_6)
  lstm_7 = BatchNormalization()(lstm_7)
  lstm_7 = Dropout(DROPOUT_RATE)(lstm_7)

  '''commented out due to power requirements. It uses too much computational power if not using TPU, otherwise, works well.'''
  # lstm_8 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_7)
  # lstm_8 = BatchNormalization()(lstm_8)
  # lstm_8 = Dropout(DROPOUT_RATE)(lstm_8)

  # lstm_9 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_8)
  # lstm_9 = BatchNormalization()(lstm_9)
  # lstm_9 = Dropout(DROPOUT_RATE)(lstm_9)

  # lstm_10 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True))(lstm_9)
  # lstm_10 = BatchNormalization()(lstm_10)
  # lstm_10 = Dropout(DROPOUT_RATE)(lstm_10)

  predicted_char = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(256,activation='softmax'))(lstm_7)

  return tf.keras.Model(inputs=[source], outputs=[predicted_char])

In [5]:
# training the model

from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='loss',patience=3, restore_best_weights=True)

try:
  tf.keras.backend.clear_session()
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
  tf.config.experimental_conect_to_cluster(resolver)

  tf.tpu.experimental.initialize_tpu_system(resolver)
  print('All devices', tf.config.list_logical_devices('TPU'))

  strategy = tf.distribute.experimental.TPUStrategy(resolver)
  with strategy.scope():
    training_model = lstm_model(seq_len=100, stateful=True)
    training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.01),
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
    training_model.fit(
        input_fn(),
        steps_per_epoch=100,
        epochs=10
    )
    training_model.save_weights('tmp/bard.h5', overwrite=True)
except:
  training_model = lstm_model(seq_len=100, stateful=False)
  training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.01),
                         loss='sparse_categorical_crossentropy',
                         metrics=['sparse_categorical_accuracy'])
  training_model.fit(
      input_fn(),
      steps_per_epoch=100,
      epochs=10,
      callbacks=[early_stopping]
  )
  training_model.save_weights('/tmp/bard.h5', overwrite=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
# Predicting values (generating text)

BATCH_SIZE = 5
PREDICT_LEN = 250

prediction_model = lstm_model(seq_len=1, batch_size=BATCH_SIZE, stateful=True)
prediction_model.load_weights('/tmp/bard.h5')
# print(prediction_model.shape)

seed_txt = 'Looks it not like the king?  Verily, we must go! '
seed = transform(seed_txt)
seed = np.repeat(np.expand_dims(seed,0),BATCH_SIZE, axis=0)

prediction_model.reset_states()
for i in range(len(seed_txt)-1):
  prediction_model.predict(seed[:,i:i+1], verbose=0)

predictions = [seed[:,-1:]]
for i in range(PREDICT_LEN):
  last_word = predictions[-1]
  last_word = np.array(last_word).reshape((-1,1))
  # print(last_word.shape)
  next_probits = prediction_model.predict(last_word, verbose=0)[:, 0, :]

  next_idx = [
      np.random.choice(256, p=next_probits[i])
      for i in range(BATCH_SIZE)
  ]
  predictions.append(np.asarray(next_idx,dtype=np.int32))

for i in range (BATCH_SIZE):
  print('PREDICTION %d\n\n' % i)
  p = [predictions[j][i] for j in range(PREDICT_LEN)]
  # generated = ''.join([chr(c) for c in p])
  generated = ''.join([chr(int(c)) for c in p])
  print(generated)
  print()
  assert len(generated) == PREDICT_LEN, 'Generated text too short.'

PREDICTION 0


 Gress shere, batter for not?
for shy, hear firt she tell her pill fall answee wervary hreatfer-take saftor dear. I this mut nimgenss respare him graise.
Camrece her time ear wombon mut deed on the cheece hear sorth house dobe hus the desench to fa

PREDICTION 1


 And have, but be clet Caunst doth not come is you,
The breets un faite anotheres did. O heart perker?

DALVOLIA.
Come in Nevery seem, a nutt troke three!
I she not grom into his the depar not vort his deps, grace to dist him
Ajen sliph the sin

PREDICTION 2


 That to prodc?

PURTIES.
I the arrow the know.

mOLENNA.
Fire may you ro sith Prove and the service most year woflocion.

 A_LIALO.
Nes mreet her servan god Mesporm, no; bo wo. the ever: and I am was so has is sith him me be
thy demtire an

PREDICTION 3


 Loint mut the head adm?
The honours caur not muscoul
Ne you dusines, chall not not less, shile thy are I haster
art afpq faes is hath the greath that Dosn cart sork, her tume sonsp

  generated = ''.join([chr(int(c)) for c in p])
