In [15]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import os, time

In [65]:
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

In [3]:
tf.__version__

'2.6.0'

In [5]:
path_to_file = keras.utils.get_file("shakespeare.txt", "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt")

In [119]:
text = open(path_to_file, "rb").read().decode(encoding="utf-8")
print(f"the text is {len(text)} characters long")
print(text[:1000])

the text is 1115394 characters long
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for

In [7]:
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

65 unique characters


In [10]:
#Todo 2
example_texts = ["abcdefg", "xyz"]

chars = tf.strings.unicode_split(example_texts, input_encoding="UTF-8")
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [26]:
ids_from_chars = StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [27]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [30]:
chars_from_ids = StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [31]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [32]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [33]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [34]:
#Todo 3
all_ids = ids_from_chars(tf.strings.unicode_split(text, "UTF-8"))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1], dtype=int64)>

In [36]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [37]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode("utf-8"))

F
i
r
s
t
 
C
i
t
i


In [38]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

In [39]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)
for seq in sequences.take(1):
    print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [40]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [41]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [42]:
split_input_target("Tensorflow")

('Tensorflo', 'ensorflow')

In [43]:
dataset = sequences.map(split_input_target)

In [46]:
for input_example, output_example in dataset.take(1):
    print("Input example: ", text_from_ids(input_example).numpy())
    print("Output_example: ", text_from_ids(output_example).numpy())

Input example:  b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Output_example:  b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [47]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (dataset.shuffle(BUFFER_SIZE)
          .batch(BATCH_SIZE, drop_remainder=True)
          .prefetch(tf.data.experimental.AUTOTUNE))
dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [48]:
vocab_size=len(vocab)
embedding_dim = 256
rnn_units = 1024

In [68]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs, training=training)
        
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state = states, training=training)
        x = self.dense(x, training=training)
        
        if return_state:
            return x, states
        else:
            return x

In [69]:
model=MyModel(vocab_size = len(ids_from_chars.get_vocabulary()),
              embedding_dim = embedding_dim,
              rnn_units = rnn_units)

In [70]:
for input_example_batch, output_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape,
          "# batch_size, sequence_length, vocab_size")

(64, 100, 66) # batch_size, sequence_length, vocab_size


In [71]:
model.summary()

Model: "my_model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  16896     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  67650     
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [73]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
sampled_indices

array([15, 61,  9, 44,  7, 29, 59,  5,  1, 22, 12, 22, 30, 18, 60, 49, 34,
       28, 17, 31, 63, 64, 13, 43, 18, 59, 18, 57, 42, 35, 38, 31,  9, 41,
       60, 63, 50,  8, 62, 49, 52,  8, 49, 33, 42, 12,  8,  9, 27, 23, 45,
       62, 35, 31, 11, 37, 26, 65, 18, 31, 40, 42, 38, 40, 43,  7,  7, 30,
       16, 57, 30, 35, 64, 58, 65,  3, 11,  1, 32, 15, 16, 57, 13, 60, 34,
        1, 53, 44, 36, 44, 21, 10, 49, 26,  5, 15, 29, 31, 19,  1],
      dtype=int64)

In [75]:
print("Input: \n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next char predictions: \n", text_from_ids(sampled_indices).numpy())

Input: 
 b"orshipp'd sun\nPeer'd forth the golden window of the east,\nA troubled mind drave me to walk abroad;\nW"

Next char predictions: 
 b'Bv.e,Pt&\nI;IQEujUODRxy?dEtErcVYR.buxk-wjm-jTc;-.NJfwVR:XMzERacYad,,QCrQVysz!:\nSBCr?uU\nneWeH3jM&BPRF\n'


In [90]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [96]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
    print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
    print("Mean Loss: ", example_batch_mean_loss)
    print(tf.exp(example_batch_mean_loss).numpy())

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean Loss:  tf.Tensor(4.1899133, shape=(), dtype=float32)
66.01707


In [97]:
model.compile(optimizer="adam", loss = loss)

In [99]:
checkpoint_dir = "./training_checkpoints"
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                      filepath=checkpoint_prefix, save_weights_only=True)

In [100]:
history = model.fit(dataset, epochs=10, callbacks = [checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [107]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars
        
        skip_ids = self.ids_from_chars(["[UNK]"])[:, None]
        sparse_mask = tf.SparseTensor(
                        values = [-float("inf")]*len(skip_ids),
                        indices = skip_ids,
                        dense_shape = [len(ids_from_chars.get_vocabulary())],)
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        input_chars = tf.strings.unicode_split(inputs, "UTF-8")
        input_ids = self.ids_from_chars(input_chars).to_tensor()
        
        predicted_logits, states = self.model(inputs = input_ids, states = states, return_state=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits / self.temperature
        predicted_logits = predicted_logits + self.prediction_mask
        
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        
        predicted_chars = self.chars_from_ids(predicted_ids)
        
        return predicted_chars, states

In [108]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [110]:
start = time.time()
states = None
next_char = tf.constant(["ROMEO:"])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode("utf-8"), "\n\n" + "_" * 80)
print("\nRun time: ", end-start)

ROMEO:
It is spite your wrockly, till they're?

GRUMIO:
Fell so: yea, leanness sibtle pardon.

SICINIUS:
What, host madeage?

ANGELO:
My lord.

GREMIO:
O gallable!
How sir, I do be so, by thee?

DUKE VINCENTIO:
Come all do: for it shall not have it, to receive from the glother's faith.
But say, say 'em!' That's your quarrel: till I'll tell her ignoran,
And he my grace geneward hanting: but if a
Dow from sline of weathing her young pride.

TENTIIO:
Not at that kingled father, and faults,
Exam to you.

LUCIO:
Why, none than any world to brawn Claudio?

MARCIUS:
Ne'er trust to be into myself.

HORTENSIO:
Condend thee, with all but strength in meth,
If brother having for me in headness,
As be us to our pretry floxe.

FRIAR LAURENCE:

PAULINA:
Nay, tut,
And all thy brother He mistands from
sings at me, were they go about this beauteful friend
Hath spoken fair and tale them king,
May not remained eyes but life look.
My father gave him baw thy choice nor inkeen.
And when I love the likelict s

In [113]:
start = time.time()
states = None
next_char = tf.constant(["ROMEO:", "ROMEO:", "ROMEO:", "ROMEO:", "ROMEO:"])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode("utf-8"), "\n\n" + "_" * 80)
print("\nRun time: ", end-start)

ROMEO:
Even what morions will have found you try death?
Here's it friends in the noble duke ert report.

GREMIO:
And you in Tutnot will, my lord.

VILINIUS:
If, therefore from thy banish is't
for your true dead days levy.

SLY:
'Tis like all for I would have look'st envion against
The world is breath; and then it say that which he request,
As east will sleep not in the city feed;
But I'll want the best,
To strike up your rabs, the oracle-forced thee for,
My receives which he ishined in the bea.
But Henry of his warding broken
Those fear of mine eyes to me for't:
I chy point thou be content;
And strock and scorns the king's death, alive,
Out of a grave with all prace eyes with words:
And so I think, no man common buy.
Farewell.

GLOUCESTER:
Neaven, I'll tribund to o'er the perit;
And thrive my violan of the fearful lamfs,
God growing by the garland of Menenity'
Was he will prevails not that you but weth.
Nay think my poor from tain'd Clifford,
Which thy arguous thursdary: let our rascel

In [115]:
tf.saved_model.save(one_step_model, "one_step")
one_step_reloaded = tf.saved_model.load("one_step")





INFO:tensorflow:Assets written to: one_step\assets


INFO:tensorflow:Assets written to: one_step\assets


In [116]:
states = None
next_char = tf.constant(["ROMEO:"])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_reloaded.generate_one_step(next_char, states)
    result.append(next_char)

result = tf.strings.join(result)
print(result[0].numpy().decode("utf-8"))

ROMEO:
Nay, with all persuading friends,
Your willingly as art by right,
Good morrily; for thy thy chose them further night
Shall lose his brother; and my frozen content.

DUKE VINCENTIO:
It is the adverse ballads that I had pund
But for this trual friends your voices.

YORK:
If any lady courses and death will pit it is;
And yet I'll pitiful army: therefore, by a fale,
since his woefully, take it not,
That think changes will appeal me claip or his firm
As is he and an hurters cock,
This triughts traitor is no less will; but no,
Though I am continent again.

JOHN OF GAUNT:
Well, I may not done: this gracious Lord of God, thou hast
A fool thy words a vallad fount beward of man.
Dest I proceed, and for are gold with the nobles,
Sometimes they erw commatuness. Being masters,
I crave forth fear that shrift arms enemy nay, when I have
Ert remomest an arry too.

BRUTUS:
I do court to prison; this world's; and
Shall'st thou reely sent this busicu.

ELBOW:
Fie frumble times against this world.
