In [1]:
!wget --show-progress --continue -O /content/shakespeare.txt http://www.gutenberg.org/files/100/100-0.txt

--2024-05-23 15:52:21--  http://www.gutenberg.org/files/100/100-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.gutenberg.org/files/100/100-0.txt [following]
--2024-05-23 15:52:21--  https://www.gutenberg.org/files/100/100-0.txt
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5618815 (5.4M) [text/plain]
Saving to: ‘/content/shakespeare.txt’


2024-05-23 15:52:22 (16.5 MB/s) - ‘/content/shakespeare.txt’ saved [5618815/5618815]



In [2]:
!head -n5 /content/shakespeare.txt
!echo "..."
!shuf -n5 /content/shakespeare.txt

﻿*** START OF THE PROJECT GUTENBERG EBOOK THE COMPLETE WORKS OF WILLIAM
SHAKESPEARE ***
﻿The Complete Works of William Shakespeare

by William Shakespeare
...
I’m sorry they offend you, heartily;
sound, but not in government.

My brother and thy uncle, call’d Antonio—
Sir, I have not you by the hand.


In [3]:
# Collecting data and setting methods for pre-processing

import numpy as np
import tensorflow as tf
import os

from packaging import version
if version.parse(tf.__version__)<version.parse('2.0'):
  raise Exception('This notebook is compatible with TensorFlow 2.0 or higer.')

SHAKESPEARE_TXT = '/content/shakespeare.txt'

def transform(txt):
  return np.asarray([ord(c) for c in txt if ord(c) < 255], dtype =np.int32)

def input_fn(seq_len=100, batch_size=1024):
  """Retrun a dataset of source and target sequences for training."""
  with tf.io.gfile.GFile(SHAKESPEARE_TXT,'r') as f:
    txt = f.read()
  source = tf.constant(transform(txt), dtype=tf.int32)

  ds = tf.data.Dataset.from_tensor_slices(source).batch(seq_len+1,drop_remainder =True)

  def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

  BUFFER_SIZE = 10000

  ds = ds.map(split_input_target).shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder = True)

  return ds.repeat()

In [9]:
# Building the model using LSTM to retain coherency in longer sentences

from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2

EMBEDDING_DIM = 512
DROPOUT_RATE=0.2
L2 = 0.00

def lstm_model(seq_len=100,batch_size=None, stateful=True):
  source = tf.keras.Input(name ='seed', shape=(seq_len,),batch_size=batch_size, dtype=tf.int32)

  embedding = tf.keras.layers.Embedding(input_dim=256, output_dim=EMBEDDING_DIM)(source)
  lstm_1 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(embedding)
  lstm_1 = BatchNormalization()(lstm_1)
  lstm_1 = Dropout(DROPOUT_RATE)(lstm_1)

  lstm_2 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_1)
  lstm_2 = BatchNormalization()(lstm_2)
  lstm_2 = Dropout(DROPOUT_RATE)(lstm_2)

  lstm_3 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_2)
  lstm_3 = BatchNormalization()(lstm_3)
  lstm_3 = Dropout(DROPOUT_RATE)(lstm_3)

  lstm_4 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_3)
  lstm_4 = BatchNormalization()(lstm_4)
  lstm_4 = Dropout(DROPOUT_RATE)(lstm_4)

  lstm_5 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_4)
  lstm_5 = BatchNormalization()(lstm_5)
  lstm_5 = Dropout(DROPOUT_RATE)(lstm_5)

  lstm_6 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_5)
  lstm_6 = BatchNormalization()(lstm_6)
  lstm_6 = Dropout(DROPOUT_RATE)(lstm_6)

  lstm_7 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_6)
  lstm_7 = BatchNormalization()(lstm_7)
  lstm_7 = Dropout(DROPOUT_RATE)(lstm_7)

  lstm_8 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_7)
  lstm_8 = BatchNormalization()(lstm_8)
  lstm_8 = Dropout(DROPOUT_RATE)(lstm_8)

  lstm_9 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_8)
  lstm_9 = BatchNormalization()(lstm_9)
  lstm_9 = Dropout(DROPOUT_RATE)(lstm_9)

  lstm_10 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_9)
  lstm_10 = BatchNormalization()(lstm_10)
  lstm_10 = Dropout(DROPOUT_RATE)(lstm_10)

  lstm_11 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_10)
  lstm_11 = BatchNormalization()(lstm_11)
  lstm_11 = Dropout(DROPOUT_RATE)(lstm_11)

  lstm_12 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_11)
  lstm_12 = BatchNormalization()(lstm_12)
  lstm_12 = Dropout(DROPOUT_RATE)(lstm_12)

  lstm_13 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_12)
  lstm_13 = BatchNormalization()(lstm_13)
  lstm_13 = Dropout(DROPOUT_RATE)(lstm_13)

  lstm_14 = (tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_13)
  lstm_14 = BatchNormalization()(lstm_14)
  lstm_14 = Dropout(DROPOUT_RATE)(lstm_14)

  lstm_15 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(EMBEDDING_DIM, stateful=stateful, return_sequences=True, kernel_regularizer=l2(L2)))(lstm_14)
  lstm_15 = BatchNormalization()(lstm_15)
  lstm_15 = Dropout(DROPOUT_RATE)(lstm_15)


  predicted_char = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(256,activation='softmax'))(lstm_15)

  return tf.keras.Model(inputs=[source], outputs=[predicted_char])

In [10]:
# training the model

# BATCH_SIZE = 512
# SEQ_LEN = 100

from tensorflow.keras.callbacks import EarlyStopping

EPOCHS = 50
LEARNING_RATE = 0.01

# Set seed for reproducibility
tf.random.set_seed(42)

# Define the Reduce learning rate on plateau to make an adapting learning rate to escape the local minimum
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = 'loss',
    factor = 0.5,
    patience=2,
    min_lr=0.0001,
    verbose=1
)
early_stopping = EarlyStopping(monitor='loss',patience=3, restore_best_weights=True)


try:
  print("Tensorflow version " + tf.__version__)

  try:
    tf.keras.backend.clear_session()
    # try this block if being able to connect to TPU v2
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print(f'Running on a TPU w/{tpu.num_accelerators()["TPU"]} cores')

    # Check if the TPU system has already been initialized
    if not tf.config.list_logical_devices('TPU'):
      tf.config.experimental_connect_to_cluster(tpu)
      tf.tpu.experimental.initialize_tpu_system(tpu)
    else:
        print("TPU system has already been initialized.")
    tpu_strategy = tf.distribute.TPUStrategy(tpu)

    with tpu_strategy.scope():
      training_model = lstm_model(seq_len=100, stateful=False)
      training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                             loss='sparse_categorical_crossentropy',
                             metrics=['sparse_categorical_accuracy'])
      training_model.fit(
          input_fn(),
          steps_per_epoch=100,
          epochs=EPOCHS,
          callbacks=[early_stopping, reduce_lr]
      )
      training_model.save_weights('/tmp/bard.h5', overwrite=True)
  except ValueError:
    # raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
    print("ERROR: Not connected to a TPU runtime; trying deprecated TPU connection...")
    # if couldn't connect to TPU v2, connect to TPU (deprecated)



    tf.keras.backend.clear_session()
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    print('All devices', tf.config.list_logical_devices('TPU'))

     # strategy = tf.distribute.TPUStrategy(resolver)
    strategy = tf.distribute.experimental.TPUStrategy(resolver)

    with strategy.scope():
      training_model = lstm_model(seq_len=100, stateful=False)
      training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                            loss='sparse_categorical_crossentropy',
                            metrics=['sparse_categorical_accuracy'])
      history = training_model.fit(
          input_fn(),
          steps_per_epoch=100,
          epochs=EPOCHS,
          callbacks=[early_stopping, reduce_lr]
      )
      training_model.save_weights('/tmp/bard.h5', overwrite=True)

# Connecting to CPU/GPU if couldn't connnect to either TPU
except Exception as e:
  print(f"TPU connection failed with error: {e}, falling back to CPU/GPU")
  tf.keras.backend.clear_session()
  training_model = lstm_model(seq_len=100, stateful=False)
  training_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
                         loss='sparse_categorical_crossentropy',
                         metrics=['sparse_categorical_accuracy'])
  training_model.fit(
      input_fn(),
      steps_per_epoch=100,
      epochs=EPOCHS,
      callbacks=[early_stopping, reduce_lr]
  )
  training_model.save_weights('/tmp/bard.h5', overwrite=True)

Tensorflow version 2.12.0
Running on a TPU w/8 cores
TPU system has already been initialized.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 16/50


In [11]:
# Predicting values (generating text)

BATCH_SIZE = 5
PREDICT_LEN = 250

prediction_model = lstm_model(seq_len=1, batch_size=BATCH_SIZE, stateful=True)
prediction_model.load_weights('/tmp/bard.h5')
# print(prediction_model.shape)

seed_txt = 'Looks it not like the king?  Verily, we must go! '
seed = transform(seed_txt)
seed = np.repeat(np.expand_dims(seed,0),BATCH_SIZE, axis=0)

prediction_model.reset_states()
for i in range(len(seed_txt)-1):
  prediction_model.predict(seed[:,i:i+1], verbose=0)

predictions = [seed[:,-1:]]
for i in range(PREDICT_LEN):
  last_word = predictions[-1]
  last_word = np.array(last_word).reshape((-1,1))
  # print(last_word.shape)
  next_probits = prediction_model.predict(last_word, verbose=0)[:, 0, :]

  next_idx = [
      np.random.choice(256, p=next_probits[i])
      for i in range(BATCH_SIZE)
  ]
  predictions.append(np.asarray(next_idx,dtype=np.int32))

for i in range (BATCH_SIZE):
  print('PREDICTION %d\n\n' % i)
  p = [predictions[j][i] for j in range(PREDICT_LEN)]
  # generated = ''.join([chr(c) for c in p])
  generated = ''.join([chr(int(c)) for c in p])
  print(generated)
  print()
  assert len(generated) == PREDICT_LEN, 'Generated text too short.'

PREDICTION 0


 uouoe u yo, ,ouot y i Ioncd u Io;cn Io.oIlI .ou u uo?oIo?ftcpou ?ouwIosohbh twIbI Y Ion T ucn . uoCouoI yoao;fI I uw?ouo? .o?wuos i .o?oo: iouohok uoao?o.oIcIo; IodoufIoIue.rateinnntngdea?heaegrhae.eveatg,eised-rats.irEnngtsmedegteidngetnnvegenRep

PREDICTION 1


 t n uouosoyl!oaou .oa n df; y soe I uoulu : IcnouoIo,f?o; uoSluo;cuot uououoyfsb,oIbIo,oIog oyoIoto, J IotoI do; IonoI I .buoIoI ;lI I ! ;oioIou ?cnoIouoIoIl? IoIo? ; .o
cI Çouot Ioto;o; ?buoIoaouo. . to.ou !o;oposo?osoIw?ouo?oYodoyo, uwdoIotctw? u

PREDICTION 2


 hotoeou CoIou r , I s ,ou Io;o;cIo?o,o.ono?o?oIo,bIoI tcsceou uo? ,lIly Iod yo?oToI ,lpo; ? I I uoy nl,o;o? u Io: Iouo?ouo!lu soI ybioI ? , uÍenintenisame-ngadeptenn-eesasriexe-i.gnimseshL-dnetnnggsetnsiindisrnd-RsueL--RdnnN-sA.anÀs-g-pn_iismitgnn

PREDICTION 3


 tos nos Ioy tonotokou go!on e uoy YÍnrgshneUaredeasetiesidnntme.s,aht.-ne.eiesanaeeigdnrthisehd-siptddmesAddnted.n__-ngmas.-ne8sniisdr.e.nRsMAedUvtdnsm-Rti-EsgsEsagren¶Adde.m.gnMAsUZ