In [1]:
import tensorflow as tf
import numpy as np
import os
import time

In [2]:
path_to_file = "shakespeare.txt"

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')[37:]
print(f'length of text: {len(text)} characters')

length of text: 94238 characters


In [4]:
print(text[:250])

From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-s


In [5]:
vocab = sorted(set(text))
print(f'total vocabulary size: {len(vocab)}')

total vocabulary size: 61


In [6]:
# create the string lookup layer
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab))

In [7]:
example_text = ['abcde', 'fgh']
chars = tf.strings.unicode_split(example_text, input_encoding='UTF-8')

ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[36, 37, 38, 39, 40], [41, 42, 43]]>

In [8]:
# create chars from ids
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True)

In [9]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e'], [b'f', b'g', b'h']]>

In [10]:
# join the characters
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcde', b'fgh'], dtype=object)

In [11]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [12]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(94238,), dtype=int64, numpy=array([18, 53, 50, ..., 17, 26, 16], dtype=int64)>

In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [14]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
r
o
m
 
f
a
i
r
e


In [15]:
seq_len = 40

In [16]:
sequences = ids_dataset.batch(seq_len+1, drop_remainder=True)

for seq in sequences.take(1):
    print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'r' b'o' b'm' b' ' b'f' b'a' b'i' b'r' b'e' b's' b't' b' ' b'c'
 b'r' b'e' b'a' b't' b'u' b'r' b'e' b's' b' ' b'w' b'e' b' ' b'd' b'e'
 b's' b'i' b'r' b'e' b' ' b'i' b'n' b'c' b'r' b'e' b'a' b's' b'e'], shape=(41,), dtype=string)


In [17]:
for seq in sequences.take(2):
    print(text_from_ids(seq).numpy().decode('utf-8'))

From fairest creatures we desire increase
,
That thereby beauty's rose might never 


In [18]:
# function for splitting input and label
def split_input_label(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [19]:
print(split_input_label(list('Shakesphere')))

(['S', 'h', 'a', 'k', 'e', 's', 'p', 'h', 'e', 'r'], ['h', 'a', 'k', 'e', 's', 'p', 'h', 'e', 'r', 'e'])


In [20]:
dataset = sequences.map(split_input_label)

In [21]:
for input_example, target_example in dataset.take(1):
    print(f'Input: {text_from_ids(input_example).numpy()}')
    print(f'Input: {text_from_ids(target_example).numpy()}')

Input: b'From fairest creatures we desire increas'
Input: b'rom fairest creatures we desire increase'


In [22]:
# batch size
BATCH_SIZE = 32

BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(32, 40), dtype=tf.int64, name=None), TensorSpec(shape=(32, 40), dtype=tf.int64, name=None))>

In [23]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

vocab_size

62

In [24]:
class MyRNNModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnn1 = tf.keras.layers.SimpleRNN(rnn_units, return_sequences=True, return_state=True)
        self.rnn2 = tf.keras.layers.SimpleRNN(rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
        
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(inputs, training=training)
        if states == None:
            states = self.rnn1.get_initial_state(x)
#         x, states1 = self.rnn1(x, initial_state=states, training=training)
        x, states2 = self.rnn2(x, initial_state=states, training=training)
        x = self.dense(x)
        
        if return_state:
            return x, states2
        else:
            return x

In [25]:
model = MyRNNModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)

In [26]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(32, 40, 62) # (batch_size, sequence_length, vocab_size)


In [27]:
model.summary()

Model: "my_rnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  15872     
                                                                 
 simple_rnn (SimpleRNN)      multiple                  0 (unused)
                                                                 
 simple_rnn_1 (SimpleRNN)    multiple                  1311744   
                                                                 
 dense (Dense)               multiple                  63550     
                                                                 
Total params: 1,391,166
Trainable params: 1,391,166
Non-trainable params: 0
_________________________________________________________________


In [28]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [29]:
sampled_indices

array([44,  8, 48, 51, 52, 44, 14, 26, 31, 21,  5, 24, 49, 18, 57, 28, 19,
       61, 57,  9, 35, 41, 12, 36, 12, 51, 29, 35, 55, 58, 35, 16, 11, 38,
       41,  1,  4, 58, 24, 59], dtype=int64)

In [30]:
print(f"Input:\n {text_from_ids(input_example_batch[0]).numpy()}")
print()
print(f"Output from untrained model:\n {text_from_ids(sampled_indices).numpy()}")

Input:
 b'its,\nWhen I am sometime absent from thy '

Output from untrained model:
 b"i-mpqiBNTI(LnFvPGzv.Yf?a?pRYtwYD;cf\n'wLx"


In [31]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [32]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (32, 40, 62)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1356616, shape=(), dtype=float32)


In [33]:
tf.exp(example_batch_mean_loss).numpy()

62.530945

In [34]:
model.compile(optimizer='adam', loss=loss, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [35]:
checkpoint_directory = './training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt_{epoch}")

checkpoint_callbacks = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    save_freq=10*64
)

In [36]:
EPOCHS = 30

In [37]:
history = model.fit(dataset, epochs=30, callbacks=[checkpoint_callbacks])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [38]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        # Create a mask to prevent "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits]
        predicted_logits, states = self.model(inputs=input_ids, states=states,
                                              return_state=True)
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        # Apply the prediction mask: prevent "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, states

In [39]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [40]:
start = time.time()
states = None
next_char = tf.constant(['From fairiest creature'])
result = [next_char]

for n in range(2000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)
    print(next_char[0].numpy().decode('utf-8'), sep='\n\n', end='')

result = tf.strings.join(result)
end = time.time()
print()
print("_"*100, end="\n\n")
# print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

 hrign.
And you on smy not my best love Andordw:
An who haster soul:  
More of this ?
You in subjust all in witter, your creater,
And beauty slumbosed by the tell.  
Freen's eye is sabred with wearned by black should not so breath, but in pleasure of my friend,
So long as yet love remembered from leed.
Then if desire influenct usbonqunctyed:
No march my bust did call thy show,
Then it lives it a widow'st waspess that before with all,
And more delight,
Though me with dey,
My self I'll bosd fixst intwomn acquaiving age,
Be you will, thy odners reemed,
That I should despite hate freeved less?
The old aptime time lost did stape their scare thee gright be't of dorpost
To lim sigce)
As I lived when in thie character,
Which hate not false I swear,
Lesdows nou.



Saye laytle corpentt,  
Somesing old thy good rich lips.
Howempilich in our oftaris ey
So fell my move,
And that when thou rest confound,
Time did child!
Loobs he vich that living thought have nomen's byet,
The play oy dead?
For thin

In [41]:
class MyLSTMModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, lstm_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm1 = tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True)
        self.lstm2 = tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
        
    def call(self, inputs, c_state=None, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(inputs, training=training)
        if states == None:
            states, c_state = self.lstm1.get_initial_state(x)
#         x, states1 ,c_state1 = self.lstm1(x, initial_state=[states, c_state], training=training)
        x, states2, c_state2 = self.lstm2(x, initial_state=[states, c_state], training=training)
        x = self.dense(x)
        
        if return_state:
            return x, [states2, c_state2]
        else:
            return x

In [42]:
model = MyLSTMModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    lstm_units=rnn_units
)

In [43]:
model.compile(optimizer='adam', loss=loss, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [44]:
model.fit(dataset, epochs=20, callbacks=checkpoint_callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x17c16c9ddf0>

In [45]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        # Create a mask to prevent "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None, c_state=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits]
        predicted_logits, [states, c_state] = self.model(inputs=input_ids, states=states, c_state=c_state,
                                              return_state=True)
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        # Apply the prediction mask: prevent "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, [states, c_state]

In [46]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [47]:
start = time.time()
states, c_state = None, None
next_char = tf.constant(['From the fairiest creatures'])
result = [next_char]

for n in range(2000):
    next_char, [states, c_state] = one_step_model.generate_one_step(next_char, states=states, c_state=c_state)
    result.append(next_char)
    print(next_char[0].numpy().decode('utf-8'), sep='\n\n', end='')

result = tf.strings.join(result)
end = time.time()
print()
print("_"*100, end="\n\n")
# print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

 in hy shadow in his torter,
The spepit title of hy life hid soil this beauty is summer's grace,
And heary I lost, the olates you canny
An bllows all lively paces, I sow, that thou dost wake elsewhere,
Though wauting posy, youth not so much hath a tarve autuse their sprite,
What canOw I swear it out every than time?
For that scire of brane dhymen, and therein subling hand,
Art love in thou wilt befrives to ither-salve to misure me,
And every change agising,
On storn befored, and night:  
Of Though west with own wordand primake image wirds comblexy Creal,
Came thee bast and wrinkles flame should aloue,
That eyes cange I were not know my heaven thems in thy beauty do I beckay.


'Tife thou shalt not boast that id a fam for weter-say,  
Nor hawe and I precious juilt be but time decay,
Of place whereof may nightly but thy die, must exconst,
Lath give an limitious lives in thy bud, to-losied phare,
I must I one then do mine eye is in my sing,
And the frillich first thou haster vistor on the