In [None]:
import tensorflow as tf
import numpy as np
import os
import time

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


In [None]:
# Read,then decode for py2 compat
text=open(path_to_file,'rb').read().decode(encoding='utf-8')

# Length of the text is the number of characters in it
print(f'length of text: {len(text)} characters')

length of text: 1115394 characters


In [None]:
# take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [None]:
# the unique characters in the file
vocab= sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [None]:
example_texts=['abcdefg','xyz']
chars=tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
ids_from_chars=tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None
)

In [None]:
ids=ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [None]:
chars_from_ids=tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None
)

In [None]:
chars=chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [None]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
text_from_ids([45])

<tf.Tensor: shape=(), dtype=string, numpy=b'f'>

In [None]:
all_ids=ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [None]:
ids_dataset=tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
for ids in ids_dataset.take(10):
  print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [None]:
seq_length = 100

In [None]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [None]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [None]:
def split_input_target(sequence):
  input_text=sequence[:-1]
  target_text=sequence[1:]
  return input_text, target_text

In [None]:
split_input_target(list('Tensorflow'))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [None]:
dataset=sequences.map(split_input_target)

In [None]:
for input_example, target_example in dataset.take(1):
  print('input: ',text_from_ids(input_example).numpy())
  print('target: ', text_from_ids(target_example).numpy())

input:  b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
target:  b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [None]:
# creating batches

BATCH_SIZE= 64
BUFFER_SIZE= 10000

dataset=(
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [None]:
# length of the ocabulary in StringLookup layer
vocab_size=len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim=256

# Number of RNN units
rnn_units= 1024

In [None]:
'''class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__()
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x'''
inputs = tf.keras.Input(shape=(None,))

# Embedding layer
x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)

# LSTM layer (returns full sequence output, hidden state, and cell state)
x, hidden_state, cell_state = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)(x)

# Dense layer (only pass the sequence output `x` to the Dense layer)
outputs = tf.keras.layers.Dense(vocab_size)(x)

# Build the model: only return outputs (ignore states during training)
model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.summary()

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions=model(input_example_batch)
  print(example_batch_predictions.shape, '# (batch_size, sequence_length, vocab_size)')

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [None]:
sampled_indices=tf.random.categorical(example_batch_predictions[0],num_samples=1)
sampled_indices=tf.squeeze(sampled_indices, axis=-1).numpy()

In [None]:
sampled_indices

array([ 9,  9, 31, 30, 27, 61, 44, 38, 18, 51, 64, 23, 38, 45, 47, 65,  8,
       54, 55, 65, 39,  3, 51, 55, 65, 33,  5, 40, 48, 62, 47, 10, 43, 17,
       18, 23,  8, 44, 56, 54, 64, 25, 46,  1,  1, 15, 14, 19, 27, 34, 53,
       56, 40, 50, 28, 13, 32, 28, 56,  9,  1,  1, 15, 58,  6, 31, 56, 22,
       32,  9, 32,  9, 12, 34, 60, 42, 60, 56, 10, 10, 19, 59, 13,  0, 30,
       44, 17, 39, 50,  4, 29, 41, 58, 10, 17, 36, 19,  5, 34, 59])

In [None]:
print('Input: \n', text_from_ids(input_example_batch[0]).numpy())
print()
print('Next char Predictions:\n', text_from_ids(sampled_indices).numpy())

Input: 
 b'\nWe are advertised by our loving friends\nThat they do hold their course toward Tewksbury:\nWe, having'

Next char Predictions:
 b"..RQNveYElyJYfhz-opzZ!lpzT&aiwh3dDEJ-eqoyLg\n\nBAFNUnqakO?SOq.\n\nBs'RqIS.S.;Uucuq33Ft?[UNK]QeDZk$Pbs3DWF&Ut"


*** Train the model ***

In [None]:
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
example_batch_mean_loss=loss(target_example_batch, example_batch_predictions)
print('prediction shape:', example_batch_predictions.shape, "# batch_size,sequence_length, vocab_size)")
print('Mean loss:    ', example_batch_mean_loss)

prediction shape: (64, 100, 66) # batch_size,sequence_length, vocab_size)
Mean loss:     tf.Tensor(4.190137, shape=(), dtype=float32)


In [None]:
tf.exp(example_batch_mean_loss).numpy()

66.03183

In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir='./training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix=os.path.join(checkpoint_dir, 'ckpt_{epoch}.weights.h5')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    verbose=1
)

In [None]:
EPOCHS=20

In [None]:
history=model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 3.1365
Epoch 1: saving model to ./training_checkpoints/ckpt_1.weights.h5
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 70ms/step - loss: 3.1338
Epoch 2/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 2.0746
Epoch 2: saving model to ./training_checkpoints/ckpt_2.weights.h5
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 72ms/step - loss: 2.0741
Epoch 3/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 1.7783
Epoch 3: saving model to ./training_checkpoints/ckpt_3.weights.h5
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 73ms/step - loss: 1.7781
Epoch 4/20
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 1.6103
Epoch 4: saving model to ./training_checkpoints/ckpt_4.weights.h5
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [None]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature=temperature
    self.model=model
    self.chars_from_ids=chars_from_ids
    self.ids_from_chars=ids_from_chars

    # create a mask to prevent '[UNK]' from being generated.
    skip_ids=self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask=tf.SparseTensor(
        # put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocaburaly
        dense_shape=[len(ids_from_chars.get_vocabulary())]
    )
    self.prediction_mask=tf.sparse.to_dense(sparse_mask)


  def generate(self, inputs):
      # convert strings to token IDs.
    input_chars=tf.strings.unicode_split(inputs,'UTF-8')
    input_ids=self.ids_from_chars(input_chars).to_tensor()

      # run model
      # predicted_logits.shape is [batch, char, necxt_char_logits]
    predicted_logits, states=self.model(inputs=input_ids, states=states,
                                          return_state=True)
      # only use last prediction
    predicted_logits=predicted_logits[:,-1,:]
    predicted_logits=predicted_logits/self.temperature
      # apply prediction mask: prevent "[UNK]" from being generated.
    predicted_logits=predicted_logits + self.prediction_mask

      #sample the output logits to generate token IDs.
    predicted_ids=tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids=tf.squeeze(predicted_ids, axis=-1)

      # convert from token ids to characters
    predicted_chars =self.chars_from_ids(predicted_ids)
      # return the characters and model state.
    return predicted_chars, states


In [None]:
one_step_model=OneStep(model,chars_from_ids, ids_from_chars)

In [None]:
start=time.time()
states=None
naxt_char=tf.constant(['ROMEO:'])
result=[next_char]

for n in range(1000):
  next_char=one_step_model.generate(next_char )
  result.append(next_char)

result=tf.strings.join(result)
end=time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\n Run time: ', end-start)

TypeError: got an unexpected keyword argument 'states'