In [1]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

import numpy as np
import os
import time

In [2]:
text = open('ptter4.txt', 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

Length of text: 1107547 characters


In [3]:
print(text[:250])

HARRY POTTER AND THE GOBLET OF FIRE

CHAPTER ONE - THE RIDDLE HOUSE

	The villagers of Little Hangleron still called it "the Riddle House," even though it had been many years since the Riddle family had lived there.  It stood on a hill overlooking th


In [4]:
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

80 unique characters


In [5]:
ids_from_chars = preprocessing.StringLookup(
    vocabulary=list(vocab))

In [6]:
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True)

In [7]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [8]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids 

<tf.Tensor: shape=(1107547,), dtype=int64, numpy=array([33, 26, 43, ..., 12,  3,  3])>

In [9]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [10]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

H
A
R
R
Y
 
P
O
T
T


In [11]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

In [12]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(2):
    print(chars_from_ids(seq))

tf.Tensor(
[b'H' b'A' b'R' b'R' b'Y' b' ' b'P' b'O' b'T' b'T' b'E' b'R' b' ' b'A'
 b'N' b'D' b' ' b'T' b'H' b'E' b' ' b'G' b'O' b'B' b'L' b'E' b'T' b' '
 b'O' b'F' b' ' b'F' b'I' b'R' b'E' b'\n' b'\n' b'C' b'H' b'A' b'P' b'T'
 b'E' b'R' b' ' b'O' b'N' b'E' b' ' b'-' b' ' b'T' b'H' b'E' b' ' b'R'
 b'I' b'D' b'D' b'L' b'E' b' ' b'H' b'O' b'U' b'S' b'E' b'\n' b'\n' b'\t'
 b'T' b'h' b'e' b' ' b'v' b'i' b'l' b'l' b'a' b'g' b'e' b'r' b's' b' '
 b'o' b'f' b' ' b'L' b'i' b't' b't' b'l' b'e' b' ' b'H' b'a' b'n' b'g'
 b'l' b'e' b'r'], shape=(101,), dtype=string)
tf.Tensor(
[b'o' b'n' b' ' b's' b't' b'i' b'l' b'l' b' ' b'c' b'a' b'l' b'l' b'e'
 b'd' b' ' b'i' b't' b' ' b'"' b't' b'h' b'e' b' ' b'R' b'i' b'd' b'd'
 b'l' b'e' b' ' b'H' b'o' b'u' b's' b'e' b',' b'"' b' ' b'e' b'v' b'e'
 b'n' b' ' b't' b'h' b'o' b'u' b'g' b'h' b' ' b'i' b't' b' ' b'h' b'a'
 b'd' b' ' b'b' b'e' b'e' b'n' b' ' b'm' b'a' b'n' b'y' b' ' b'y' b'e'
 b'a' b'r' b's' b' ' b's' b'i' b'n' b'c' b'e' b' ' b't' b'h' b'e' b' '
 b'R

In [13]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'HARRY POTTER AND THE GOBLET OF FIRE\n\nCHAPTER ONE - THE RIDDLE HOUSE\n\n\tThe villagers of Little Hangler'
b'on still called it "the Riddle House," even though it had been many years since the Riddle family had'
b' lived there.  It stood on a hill overlooking the village, some of its windows boarded, tiles missing'
b' from its roof, and ivy spreading unchecked over its face.  Once a fine-looking manor, and easily the'
b' largest and grandest building for miles around, the Riddle House was now damp, derelict, and unoccup'


In [14]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [16]:
dataset = sequences.map(split_input_target)

In [18]:
for input_example, target_example in  dataset.take(2):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'HARRY POTTER AND THE GOBLET OF FIRE\n\nCHAPTER ONE - THE RIDDLE HOUSE\n\n\tThe villagers of Little Hangle'
Target: b'ARRY POTTER AND THE GOBLET OF FIRE\n\nCHAPTER ONE - THE RIDDLE HOUSE\n\n\tThe villagers of Little Hangler'
Input : b'on still called it "the Riddle House," even though it had been many years since the Riddle family ha'
Target: b'n still called it "the Riddle House," even though it had been many years since the Riddle family had'


In [19]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [20]:

vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [21]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True, 
                                   return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
    def call(self, inputs, states=None, return_state=False, training=False):
        x=inputs
        x=self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)
        
        if return_state:
            return x, states
        else:
            return x


In [22]:
model = MyModel(
    vocab_size = len(ids_from_chars.get_vocabulary()),
    embedding_dim = embedding_dim,
    rnn_units = rnn_units
)

In [23]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 82) # (batch_size, sequence_length, vocab_size)


In [24]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  20992     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  84050     
Total params: 4,043,346
Trainable params: 4,043,346
Non-trainable params: 0
_________________________________________________________________


In [25]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [26]:
model.compile(optimizer='adam', loss=loss)

In [27]:
checkpoint_dir = './training_checkpointsV2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [28]:
EPOCHS = 60

In [29]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [30]:
class NextStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature=temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "" or "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(['','[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices = skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())]) 
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_next_step(self, inputs, states=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()
        print('Input shape',input_ids.shape)
        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits] 
        predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                          return_state=True)
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)
        
        # Return the characters and model state.
        return predicted_chars, states

In [31]:
next_step_model = NextStep(model, chars_from_ids, ids_from_chars)

In [32]:
start = time.time()
states = None
next_char = tf.constant(['Forest', 'the train', 'Platform', 'Bank', 'spell'])
result = [next_char]

for n in range(100):
    next_char, states = next_step_model.generate_next_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()

print(result, '\n\n' + '_'*80)


print(f"\nRun time: {end - start}")

Input shape (5, None)
Input shape (5, None)
tf.Tensor(
[b'Forestble friends."\nDumbledore gave the paper with Padma asked him toward them. "We\'re still worried, and '
 b'the train began to move up.  For Harry took the right-panished, he threw giants, his arms were filiedly down '
 b'Platformarm," said the cold voice.  "Where\'s Hermione, who are going to lunching much less in his companion '
 b'Bankant-colored straws.  He looked hard in, then got up, walked over the hemento a hurried voice.  "What'
 b'spelled, giving Harry Potter is buszed and silvery unperturned ropes of pance that Monday was not a fleas'], shape=(5,), dtype=string) 

________________________________________________________________________________

Run time: 1.2780439853668213


In [33]:
tf.saved_model.save(next_step_model, 'potterv2')


Input shape (5, None)
Input shape (5, None)




INFO:tensorflow:Assets written to: potterv2/assets


INFO:tensorflow:Assets written to: potterv2/assets
