In [1]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [6]:
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [7]:
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


In [8]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [9]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [10]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


In [11]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [12]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

In [13]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [14]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 5.46729832e-04  7.41950935e-04 -5.43905701e-03 ...  3.00657796e-03
    1.57341175e-03  1.71764963e-03]
  [-5.86823607e-03  9.29657894e-04  3.66302498e-04 ...  5.33052627e-03
    2.95066333e-04 -4.90337657e-03]
  [ 2.17773672e-03  2.17570621e-03  4.27259738e-03 ...  3.46313836e-03
    2.92041013e-03 -4.99127479e-03]
  ...
  [ 3.44167277e-03 -7.69976899e-03  2.61976197e-03 ...  3.53078358e-03
   -1.07914628e-02  3.55802523e-03]
  [ 3.57957208e-04 -5.63974399e-03  4.45354171e-03 ...  6.41012099e-04
   -8.73361342e-03  2.99073989e-04]
  [-6.20317645e-03 -5.77627029e-03  8.76121782e-03 ...  4.48932359e-03
   -6.54788036e-03 -5.02902037e-03]]

 [[-1.91156229e-03 -4.38543595e-03 -1.49552524e-03 ...  2.51645059e-03
    8.28294782e-04  1.82810496e-03]
  [-6.94392109e-03 -1.40007259e-03 -2.89825164e-03 ...  3.41509841e-03
    2.65887217e-03  3.67767410e-03]
  [-9.67427157e-03 -5.01240883e-03  3.37761012e-04 ...  7.88868591e-03
    2.65447190e-03  6.88262330e-03]
  ...
  [-9.850

In [15]:
pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[ 0.00054673  0.00074195 -0.00543906 ...  0.00300658  0.00157341
   0.00171765]
 [-0.00586824  0.00092966  0.0003663  ...  0.00533053  0.00029507
  -0.00490338]
 [ 0.00217774  0.00217571  0.0042726  ...  0.00346314  0.00292041
  -0.00499127]
 ...
 [ 0.00344167 -0.00769977  0.00261976 ...  0.00353078 -0.01079146
   0.00355803]
 [ 0.00035796 -0.00563974  0.00445354 ...  0.00064101 -0.00873361
   0.00029907]
 [-0.00620318 -0.00577627  0.00876122 ...  0.00448932 -0.00654788
  -0.00502902]], shape=(100, 65), dtype=float32)


In [16]:
time_pred = pred[0]
print(len(time_pred))
print(time_pred)

65
tf.Tensor(
[ 5.4672983e-04  7.4195093e-04 -5.4390570e-03 -3.6077448e-03
  4.7703357e-05 -3.0778986e-03  1.2298357e-03  8.5867178e-03
 -2.8979824e-05 -2.0733692e-03  2.2469151e-03  2.7372208e-03
 -4.5727440e-03 -6.1822166e-03  5.6347335e-03 -6.4780042e-03
 -1.4981368e-03 -4.2473315e-03 -6.8715340e-03  7.4205577e-04
  8.0974615e-04 -7.0028906e-03 -6.6542812e-04 -8.1322120e-05
 -1.4590849e-03  3.0104632e-03 -5.8698014e-04  5.9225801e-03
 -6.3152015e-03 -2.9143582e-03 -7.6319629e-05 -2.2738036e-03
 -1.8678951e-03 -1.0015913e-03  1.3279354e-03 -1.4485371e-03
  6.7552151e-03 -1.4432193e-03 -3.3720999e-03 -5.9255832e-03
 -5.4084943e-03  7.6772799e-03 -9.3590264e-04  7.3791138e-04
 -2.9151833e-03 -9.6841430e-04 -3.4587011e-03 -1.0307385e-02
 -7.8002216e-05 -1.9191904e-03  1.2375545e-03  4.6493500e-04
  1.7275227e-03  1.1511173e-03 -3.4073191e-03 -1.2087051e-03
  2.8767814e-03  1.1176458e-02  4.7900300e-03 -2.3786162e-03
  2.7159341e-03 -5.1799137e-04  3.0065780e-03  1.5734117e-03
  1.717649

In [17]:
sampled_indices = tf.random.categorical(pred, num_samples=1)

sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars

"yp-LKSoAULt.-NeEMMW$Z!OeY3BX3or-oesEk'jbodOvWnnsJf'LWpyddIYhG?&qCxErWaTRfbRgXIte:Lvq&MzWW g uQN\nuDBO"

In [18]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [19]:
model.compile(optimizer='adam', loss=loss)

In [20]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [21]:
history = model.fit(data, epochs=50, callbacks=[checkpoint_callback])

Epoch 1/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 70ms/step - loss: 2.8500
Epoch 2/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 72ms/step - loss: 1.8343
Epoch 3/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 72ms/step - loss: 1.5897
Epoch 4/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 73ms/step - loss: 1.4663
Epoch 5/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 72ms/step - loss: 1.3916
Epoch 6/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 71ms/step - loss: 1.3414
Epoch 7/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 71ms/step - loss: 1.2981
Epoch 8/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 71ms/step - loss: 1.2637
Epoch 9/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 72ms/step - loss: 1.2319
Epoch 10/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14

In [22]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

In [38]:
latest_checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
if latest_checkpoint_file:
    print(f"Found latest checkpoint: {latest_checkpoint_file}")
    model.build(tf.TensorShape([1, None])) # Build the model before loading weights
    model.load_weights(latest_checkpoint_file)
else:
    manual_checkpoint_path = os.path.join(checkpoint_dir, 'ckpt_50.weights.h5') # Assuming ckpt_50.weights.h5 exists based on the previous output
    if os.path.exists(manual_checkpoint_path):
        print(f"Loading weights from: {manual_checkpoint_path}")
        model.build(tf.TensorShape([1, None])) # Build the model before loading weights
        model.load_weights(manual_checkpoint_path)
    else:
        print(f"Error: Could not find latest checkpoint or specific file: {manual_checkpoint_path}")



Loading weights from: ./training_checkpoints/ckpt_50.weights.h5


In [39]:
model.build(tf.TensorShape([1, None]))
model.load_weights("./training_checkpoints/ckpt_50.weights.h5") # Correct usage for .h5 files

In [40]:
def generate_text(model, start_string):

  num_generate = 800

  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 1.0


  model.layers[1].reset_states()

  for i in range(num_generate):
      predictions = model(input_eval)

      predictions = tf.squeeze(predictions, 0)

      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()


      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [42]:
inp = input("Type a starting string: ")
print(generate_text(model, inp))

Type a starting string: perchance
perchance of the king.

GLOUCESTER:
Nay, hence, good Morth, and know no holy a man's hounds;
Knew me so old as you are early in being,
And make my mind to death, my lord of Claudio;
So thrive I in my author, but a lord.

PAGE:
Are you man that doth not these
words please, as his head upon the gaol.
Could we be satisfied! England's death, clapp'd our curbing face;
For I have fearful to thy lord.

GLOUCESTER:
No doubt, no doubt; O, 'tis a part, and good nurse, here comes the French
And none away to ve one of the other: who should keeps him,
He hurling so blind,
That with upright traitor to the party, having advantage of his worth
and unspark'd that hath devour'd the realmy: and now about
More than to stand in narrow tander
Asher my bold's vault,
That makes his feether than I looked anyour honour.

IS
