# Deep N-grams

In [46]:
import os
import traceback
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import shutil
import numpy as np
import random as  rnd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input

from termcolor import colored

import w1_unittest


# set random seed
rnd.seed(32)

# Load data

In [47]:
lines = []
dirname = "data/"
path = "shakespeare_data.txt"

with open(os.path.join(dirname, path)) as file:
    for line in file.readlines():
        line = line.strip()
        if line:
            lines.append(line)

n_lines = len(lines)
print(f'Number of line: {n_lines}')

Number of line: 125097


In [48]:
print('\n'.join(lines[506: 514]))

BENVOLIO	Here were the servants of your adversary,
And yours, close fighting ere I did approach:
I drew to part them: in the instant came
The fiery Tybalt, with his sword prepared,
Which, as he breathed defiance to my ears,
He swung about his head and cut the winds,
Who nothing hurt withal hiss'd him in scorn:
While we were interchanging thrusts and blows,


# Create vocabulary

In [49]:
text = '\n'.join(lines)
vocab = sorted(set(text))
vocab.insert(0, '[UNK]') # Special character for every unknown
vocab.insert(1, "") # Empty character for padding

In [50]:
print(f"Vocab has {len(vocab)} unique characters")
print(f"Unique character: {' '.join(vocab)}")

Vocab has 82 unique characters
Unique character: [UNK]  	 
   ! $ & ' ( ) , - . 0 1 2 3 4 5 6 7 8 9 : ; ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] a b c d e f g h i j k l m n o p q r s t u v w x y z |


# Convert a line to a tensor

In [51]:
msg = "Hello world!"
chars = tf.strings.unicode_split(msg, input_encoding="UTF-8")
print(chars)

tf.Tensor([b'H' b'e' b'l' b'l' b'o' b' ' b'w' b'o' b'r' b'l' b'd' b'!'], shape=(12,), dtype=string)


In [52]:
ids = tf.keras.layers.StringLookup(vocabulary=vocab, mask_token=None)(chars)

In [53]:
ids

<tf.Tensor: shape=(12,), dtype=int64, numpy=array([34, 59, 66, 66, 69,  4, 77, 69, 72, 66, 58,  5])>

In [54]:
def line_to_tensor(line, vocab):
    """
    Converts a line of text into a tensor of integer values representing characters.

    Args:
        line (str): A single line of text.
        vocab (list): A list containing the vocabulary of unique characters.

    Returns:
        tf.Tensor(dtype=int64): A tensor containing integers (unicode values) corresponding to the characters in the `line`.
    """

    chars = tf.strings.unicode_split(line, 'UTF-8')
    ids = tf.keras.layers.StringLookup(vocabulary=vocab, mask_token=None)(chars)
    return ids

In [55]:
# Test your function
tmp_ids = line_to_tensor('abc xyz', vocab)
print(f"Result: {tmp_ids}")
print(f"Output type: {type(tmp_ids)}")

Result: [55 56 57  4 78 79 80]
Output type: <class 'tensorflow.python.framework.ops.EagerTensor'>


In [56]:
# UNIT TEST
w1_unittest.test_line_to_tensor(line_to_tensor)

[92mAll test passed!


# Convert a tensor of ids to a line

In [57]:
chars_converted = tf.keras.layers.StringLookup(vocabulary=vocab, invert=True)(ids)

In [58]:
chars_converted

<tf.Tensor: shape=(12,), dtype=string, numpy=
array([b'H', b'e', b'l', b'l', b'o', b' ', b'w', b'o', b'r', b'l', b'd',
       b'!'], dtype=object)>

In [59]:
msg_converted = tf.strings.reduce_join(chars_converted)

In [60]:
msg_converted

<tf.Tensor: shape=(), dtype=string, numpy=b'Hello world!'>

In [61]:
def text_from_ids(ids, vocab):
    """
    Converts a tensor of integer values into human-readable text.

    Args:
        ids (tf.Tensor): A tensor containing integer values (unicode IDs).
        vocab (list): A list containing the vocabulary of unique characters.

    Returns:
        str: A string containing the characters in human-readable format.
    """

    chars = tf.keras.layers.StringLookup(vocabulary=vocab, mask_token=None, invert=True) (ids)
    return tf.strings.reduce_join(chars)

In [62]:
text_from_ids(ids, vocab).numpy()

b'Hello world!'

# Prepare data for training and testing

In [63]:
train_lines = lines[:-1000]
test_lines = lines[-1000:]

print(f'Number of training lines: {len(train_lines)}')
print(f'Number of validation lines: {len(test_lines)}')

Number of training lines: 124097
Number of validation lines: 1000


# Create tensorflow dataset

In [64]:
all_ids = line_to_tensor("\n".join(["Hello world!", "Generative AI"]), vocab)
all_ids

<tf.Tensor: shape=(26,), dtype=int64, numpy=
array([34, 59, 66, 66, 69,  4, 77, 69, 72, 66, 58,  5,  3, 33, 59, 68, 59,
       72, 55, 74, 63, 76, 59,  4, 27, 35])>

In [65]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
ids_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

In [66]:
print([text_from_ids(ids, vocab).numpy() for ids in ids_dataset.take(10)])

[b'H', b'e', b'l', b'l', b'o', b' ', b'w', b'o', b'r', b'l']


In [67]:
seq_length = 10
data_generator = ids_dataset.batch(seq_length + 1, drop_remainder=True)

In [68]:
for seq in data_generator.take(2):
    print(text_from_ids(seq, vocab).numpy())

b'Hello world'
b'!\nGenerativ'


# Generate input and output for your model

In [69]:
def split_input_target(sequence):
    """
    Splits the input sequence into two sequences, where one is shifted by one position.

    Args:
        sequence (tf.Tensor or list): A list of characters or a tensor.

    Returns:
        tf.Tensor, tf.Tensor: Two tensors representing the input and output sequences for the model.
    """

    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [70]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

# Create batch dataset

In [71]:
def create_batch_dataset(lines, vocab, seq_length=100, batch_size=64):
    """
    Creates a batch dataset from a list of text lines.

    Args:
        lines (list): A list of strings with the input data, one line per row.
        vocab (list): A list containing the vocabulary.
        seq_length (int): The desired length of each sample.
        batch_size (int): The batch size.

    Returns:
        tf.data.Dataset: A batch dataset generator.
    """
    BUFFER_SIZE = 10000
    ids = line_to_tensor('\n'.join(lines), vocab)
    dataset_ids = tf.data.Dataset.from_tensor_slices(ids)

    data_generator = dataset_ids.batch(seq_length+1, drop_remainder=True)

    dataset_xy = data_generator.map(split_input_target)

    dataset = dataset_xy.shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

    return dataset

In [72]:
dataset = create_batch_dataset(train_lines[1:100], vocab, seq_length=16, batch_size=2)

In [73]:
for batch in dataset.take(1):
    input_ids, target_ids = batch
    print(f'{text_from_ids(input_ids, vocab=vocab)}')
    print(f'{text_from_ids(target_ids, vocab=vocab)}')

b's hearing to divs in the brine\nT'
b' hearing to divi in the brine\nTh'


In [74]:
tf.random.set_seed(1)
dataset = create_batch_dataset(train_lines[1:100], vocab, seq_length=16, batch_size=2)

print("Prints the elements into a single batch. The batch contains 2 elements: ")

for input_example, target_example in dataset.take(1):
    print("\n\033[94mInput0\t:", text_from_ids(input_example[0], vocab).numpy())
    print("\n\033[93mTarget0\t:", text_from_ids(target_example[0], vocab).numpy())
    
    print("\n\n\033[94mInput1\t:", text_from_ids(input_example[1], vocab).numpy())
    print("\n\033[93mTarget1\t:", text_from_ids(target_example[1], vocab).numpy())

Prints the elements into a single batch. The batch contains 2 elements: 

[94mInput0	: b'and sight distra'

[93mTarget0	: b'nd sight distrac'


[94mInput1	: b'when in his fair'

[93mTarget1	: b'hen in his fair '


In [75]:
w1_unittest.test_create_batch_dataset(create_batch_dataset)


[92mAll test passed!


In [76]:
BATCH_SIZE = 64
dataset = create_batch_dataset(train_lines, vocab, seq_length=100, batch_size=BATCH_SIZE)

# Defining the Gated Recurrent Unit Langauge Model (GRULM)

In [77]:
class GRULM(tf.keras.models.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim)
        self.gru = tf.keras.layers.GRU(units=rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(units=vocab_size, activation=None)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs, training=training)
        x, states = self.gru(x, training=training)
        x = self.dense(x, training=training)
        if return_state:
            return x, states
        return x

In [78]:
# Length of the vocabulary in StringLookup Layer
vocab_size = 82

# The embedding dimension
embedding_dim = 256

# RNN layers
rnn_units = 512

model = GRULM(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units = rnn_units)

In [79]:
model.call(Input(shape=(100,)))

<KerasTensor shape=(None, 100, 82), dtype=float32, sparse=False, name=keras_tensor_9>

In [80]:
model.summary()

In [180]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input: ", input_example_batch[0].numpy()) # Lets use only the first sequence on the batch
    print(f'Input: {text_from_ids(input_example_batch[0], vocab=vocab)}')
    example_batch_predictions = model(tf.constant([input_example_batch[0].numpy()]))
    print("\n",example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Input:  [28 27 38 46  2 42 55 74 63 59 68 57 59  4 70 59 72 60 69 72 57 59  4 77
 63 74 62  4 77 63 66 60 75 66  4 57 62 69 66 59 72  4 67 59 59 74 63 68
 61  3 39 55 65 59 73  4 67 79  4 60 66 59 73 62  4 74 72 59 67 56 66 59
  4 63 68  4 74 62 59 63 72  4 58 63 60 60 59 72 59 68 74  4 61 72 59 59
 74 63 68 61]
Input: b'BALT\tPatience perforce with wilful choler meeting\nMakes my flesh tremble in their different greeting'

 (1, 100, 82) # (batch_size, sequence_length, vocab_size)


In [181]:
example_batch_predictions[0][99]

<tf.Tensor: shape=(82,), dtype=float32, numpy=
array([ -9.335007  , -10.044508  ,  -3.799502  ,   7.751006  ,
         7.382581  ,   5.946813  ,  -9.440785  ,  -9.95324   ,
         3.911009  ,  -6.196901  ,  -7.3926563 ,   8.592711  ,
         5.633838  ,   9.332524  , -10.640779  ,  -9.331552  ,
        -8.409723  ,  -8.244709  , -11.856878  ,  -8.053814  ,
        -9.172806  , -11.2245865 ,  -9.17948   , -10.480613  ,
         8.286085  ,   8.092345  ,   6.090642  ,  -7.0127234 ,
        -7.8652663 ,  -8.919567  ,  -9.288979  ,  -7.019684  ,
       -10.476525  ,  -7.589359  ,  -6.1545577 ,  -8.582107  ,
        -6.5651436 , -13.611316  , -11.747117  ,  -9.744756  ,
        -7.3090816 ,  -4.9205675 ,  -8.930671  ,  -8.476044  ,
        -8.429318  ,  -3.5739174 ,  -7.377135  ,  -7.4286294 ,
        -8.729064  ,  -7.7372675 ,  -7.8771424 , -10.645777  ,
        -8.237826  ,  -7.335555  ,   2.9264512 ,   0.79800963,
         2.7181623 ,  -4.9360857 ,  -0.2498487 ,   4.345892  ,
        

In [182]:
sampled_indices = tf.math.argmax(example_batch_predictions[0], axis=1)

In [183]:
sampled_indices

<tf.Tensor: shape=(100,), dtype=int64, numpy=
array([75, 44, 46, 34, 27, 59, 72, 63, 59, 68, 57, 59, 11, 63, 72, 72, 57,
       69, 72, 67, 59, 11, 67, 63, 74, 62,  4, 62, 62, 68, 66, 75, 66,  4,
       73, 69, 55, 69, 59, 72,  4, 69, 55, 55, 74, 73, 68, 61,  4, 46, 55,
       79, 59,  4,  4, 67, 59,  4, 70, 55, 59, 73, 62,  4, 55, 62, 75, 55,
       56, 66, 63,  4, 73, 68,  4, 74, 62, 59,  4, 72,  4, 57, 59, 73, 60,
       59, 72, 59, 68, 57, 13, 73, 72, 69, 59, 68, 63, 68, 61, 73])>

In [184]:
input_text = text_from_ids(input_example_batch[0], vocab)

In [185]:
input_text

<tf.Tensor: shape=(), dtype=string, numpy=b'BALT\tPatience perforce with wilful choler meeting\nMakes my flesh tremble in their different greeting'>

In [186]:
predicted_text = text_from_ids(sampled_indices, vocab)

In [187]:
predicted_text

<tf.Tensor: shape=(), dtype=string, numpy=b'uRTHAerience,irrcorme,mith hhnlul soaoer oaatsng Taye  me paesh ahuabli sn the r cesferenc.sroenings'>

# Training

In [89]:
def compile_model(model):
    """
    Sets the loss and optimizer for the given model

    Args:
        model (tf.keras.Model): The model to compile.

    Returns:
        tf.keras.Model: The compiled model.
    """

    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=0.00125)
    model.compile(optimizer=opt, loss=loss)
    return model

In [90]:
EPOCHS = 10

# Compile the model
model = compile_model(model)
# Fit the model
history = model.fit(dataset, epochs=EPOCHS)

Epoch 1/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 287ms/step - loss: 2.2889
Epoch 2/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 285ms/step - loss: 1.4984
Epoch 3/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 289ms/step - loss: 1.3947
Epoch 4/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 296ms/step - loss: 1.3484
Epoch 5/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 298ms/step - loss: 1.3212
Epoch 6/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 305ms/step - loss: 1.3010
Epoch 7/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 303ms/step - loss: 1.2851
Epoch 8/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 310ms/step - loss: 1.2726
Epoch 9/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 304ms/step - loss: 1.2621
Epoch 10/10
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [92]:
model.save_weights('./weights/model.weights.h5')

# Calculate perplexity

In [149]:
def log_perplexity(preds, target):
    """
    Function to calculate the log perplexity of a model.

    Args:
        preds (tf.Tensor): Predictions of a list of batches of tensors corresponding to lines of text.
        target (tf.Tensor): Actual list of batches of tensors corresponding to lines of text.

    Returns:
        float: The log perplexity of the model.
    """
    PADDING_ID = 1
    #print(f'Preds: {preds}')
    print(f'preds.shape[-1]: {preds.shape[-1]}')
    print(f'preds.shape: {preds.shape}') # [1, 41573, 82]
    print(f'one hot shape: {tf.one_hot(target, preds.shape[-1]).shape}')
    print(f'one hot[0][0]: {tf.one_hot(target, preds.shape[-1])[0][0]}')
    print(f'target shape: {target.shape}')
    print(f'target[0][0]: {target[0][0]}')
    print(f'one hot[0][0][31]: {tf.one_hot(target, preds.shape[-1])[0][0][31]}')

    # preds are a tensor of logprobs of shape [1, 41573, 82] which means for each word there exist logprobs in relation to vocab
    # Target has shape [1, 41573] where one item is the id of the target word. To be compatibe and to be able to calculate
    # the sum of the logprobs we need to convert the target to one-hot representation with shape [1, 41573, 82]
    log_p = np.sum(preds * tf.one_hot(target, preds.shape[-1]), axis=-1) # vector multiplication across the last axis gives shape [1, 41573]
    print(f'target.shape: {target.shape}')
    print(f'np.equal(target, PADDING_ID): {np.equal(target, PADDING_ID)}')
    non_pad = 1.0 - np.equal(target, PADDING_ID)
    print(f'non-pad: {non_pad}')
    print(f'non-pad.shape: {non_pad.shape}')
    print(f'log_p.shape: {log_p.shape}')
    log_p = log_p * non_pad

    log_ppx = np.sum(log_p, axis = -1) / np.sum(non_pad, axis=-1)
    log_ppx = np.mean(log_ppx)

    return -log_ppx
    

In [94]:
w1_unittest.test_test_model(log_perplexity)


[92mAll test passed!


  log_ppx = np.sum(log_p, axis = -1) / np.sum(non_pad, axis=-1)


In [106]:
eval_text = '\n'.join(test_lines)
eval_ids = line_to_tensor([eval_text], vocab)
print(eval_ids)
print(tf.squeeze(eval_ids, axis=0))
input_ids, target_ids = split_input_target(tf.squeeze(eval_ids, axis=0))

<tf.RaggedTensor [[53, 31, 78, ..., 68, 74, 54]]>
tf.Tensor([53 31 78 ... 68 74 54], shape=(41574,), dtype=int64)


In [107]:
target_ids

<tf.Tensor: shape=(41573,), dtype=int64, numpy=array([31, 78, 59, ..., 68, 74, 54])>

In [108]:
tf.expand_dims(target_ids, 0) # Seems model takes a row vector thereby needs to expand the first axis

<tf.Tensor: shape=(1, 41573), dtype=int64, numpy=array([[31, 78, 59, ..., 68, 74, 54]])>

In [110]:
preds, status = model(tf.expand_dims(input_ids, 0), training=False, states=None, return_state=True)

In [150]:
#Get the log perplexity
log_ppx = log_perplexity(preds, tf.expand_dims(target_ids, 0))
print(f'The log perplexity and perplexity of your model are {log_ppx} and {np.exp(log_ppx)} respectively')

preds.shape[-1]: 82
preds.shape: (1, 41573, 82)
one hot shape: (1, 41573, 82)
one hot[0][0]: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
target shape: (1, 41573)
target[0][0]: 31
one hot[0][0][31]: 1.0
target.shape: (1, 41573)
np.equal(target, PADDING_ID): [[False False False ... False False False]]
non-pad: [[1. 1. 1. ... 1. 1. 1.]]
non-pad.shape: (1, 41573)
log_p.shape: (1, 41573)
The log perplexity and perplexity of your model are -10.416288321936621 and 2.9940804110460913e-05 respectively


# Generating language with your own model

In [151]:
def temperature_random_sampling(log_probs, temperature=1.0):
    """Temperature Random sampling from a categorical distribution. The higher the temperature, the more 
       random the output. If temperature is close to 0, it means that the model will just return the index
       of the character with the highest input log_score
    
    Args:
        log_probs (tf.Tensor): The log scores for each characeter in the dictionary
        temperature (number): A value to weight the random noise. 
    Returns:
        int: The index of the selected character
    """
   # Generate uniform random numbers with a slight offset to avoid log(0)
    u = tf.random.uniform(minval=1e-6, maxval=1.0 - 1e-6, shape=log_probs.shape)
    
    # Apply the Gumbel distribution transformation for randomness
    g = -tf.math.log(-tf.math.log(u))
    
    # Adjust the logits with the temperature and choose the character with the highest score
    return tf.math.argmax(log_probs + g * temperature, axis=-1)

In [177]:
class GenerativeModel(tf.keras.Model):
    def __init__(self, model, vocab, temperature=1.0):
        """
        A generative model for text generation.

        Args:
            model (tf.keras.Model): The underlying model for text generation.
            vocab (list): A list containing the vocabulary of unique characters.
            temperature (float, optional): A value to control the randomness of text generation. Defaults to 1.0.
        """
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.vocab = vocab
    
    @tf.function
    def generate_one_step(self, inputs, states=None):
        """
        Generate a single character and update the model state.

        Args:
            inputs (string): The input string to start with.
            states (tf.Tensor): The state tensor.

        Returns:
            tf.Tensor, states: The predicted character and the current GRU state.
        """
        # Convert strings to token IDs.
        
        ### START CODE HERE ###

        # Transform the inputs into tensors
        input_ids = line_to_tensor(inputs, self.vocab)
        input_ids = input_ids.to_tensor()
        # Predict the sequence for the given input_ids. Use the states and return_state=True
        predicted_logits, states = self.model(input_ids, states=states, return_state=True)
        tf.print(f'States: {states}')
        # Get only last element of the sequence
        print(f'predicted logits.shape: {predicted_logits.shape}')
        print(f'Predicted logits: {predicted_logits}')
        predicted_logits = predicted_logits[0, -1, :]
        print(f'Predicted logits: {predicted_logits}')
        tf.print("Predicted logits:", predicted_logits)
        # Use the temperature_random_sampling to generate the next character. 
        predicted_ids = temperature_random_sampling(predicted_logits, self.temperature)
        # Use the chars_from_ids to transform the code into the corresponding char
        predicted_chars = text_from_ids([predicted_ids], self.vocab)
        
        ### END CODE HERE ###
        
        # Return the characters and model state.
        return tf.expand_dims(predicted_chars, 0), states
    
    def generate_n_chars(self, num_chars, prefix):
        """
        Generate a text sequence of a specified length, starting with a given prefix.

        Args:
            num_chars (int): The length of the output sequence.
            prefix (string): The prefix of the sequence (also referred to as the seed).

        Returns:
            str: The generated text sequence.
        """
        states = None
        next_char = tf.constant([prefix])
        result = [next_char]
        for n in range(num_chars):
            print(f'Generating one step based on char: {next_char}')
            next_char, states = self.generate_one_step(next_char, states=states)
            result.append(next_char)

        return tf.strings.join(result)[0].numpy().decode('utf-8')

In [178]:
tf.random.set_seed(272)
gen = GenerativeModel(model, vocab, temperature=0.5)

print(gen.generate_n_chars(32, " "), '\n\n' + '_'*80)
print(gen.generate_n_chars(32, "Dear"), '\n\n' + '_'*80)
print(gen.generate_n_chars(32, "KING"), '\n\n' + '_'*80)

Generating one step based on char: [b' ']
predicted logits.shape: (1, None, 82)
Predicted logits: Tensor("grulm_1_1/dense_1_1/Add:0", shape=(1, None, 82), dtype=float32)
Predicted logits: Tensor("strided_slice:0", shape=(82,), dtype=float32)
States: Tensor("grulm_1_1/gru_1_1/while:4", shape=(1, 512), dtype=float32)
Predicted logits: [-9.40867424 -9.60759926 -8.10737514 ... 1.93442428 -4.65694284 -6.42707968]
Generating one step based on char: [b'h']
predicted logits.shape: (1, None, 82)
Predicted logits: Tensor("grulm_1_1/dense_1_1/Add:0", shape=(1, None, 82), dtype=float32)
Predicted logits: Tensor("strided_slice:0", shape=(82,), dtype=float32)
States: Tensor("grulm_1_1/gru_1_1/while:4", shape=(1, 512), dtype=float32)
Predicted logits: [-10.1960106 -10.7844095 2.33636236 ... 5.66943312 -4.81247616 -6.25971889]
Generating one step based on char: [b'e']
States: Tensor("grulm_1_1/gru_1_1/while:4", shape=(1, 512), dtype=float32)
Predicted logits: [-7.46717787 -7.47703123 -1.03507066 ... 2

In [165]:
tf.random.set_seed(np.random.randint(1, 1000))
gen = GenerativeModel(model, vocab, temperature=0.8)
import time
start = time.time()
print(gen.generate_n_chars(1000, "ROMEO "), '\n\n' + '_'*80)
print('\nRun time:', time.time() - start)

predicted logits.shape: (1, None, 82)
predicted logits.shape: (1, None, 82)
ROMEO ELLINGELFise de s beyolaranthindint asepayo r.
BEx an atad.
To inoul miling woout, KI
Whamy omand ntw d,
Herithan AToulon ter ay Yoour old thand.
ANYom, cond woncoul areealkes  sthe my thise astither l wnomeno netond cey che steaiveded, GO	|
LA were ho'str,
Whay, areceth the,
SS	THI baby I mimulingreth im is wanowaf yourlobe inu witar fisce umanthale, setanondoveronthintoowindormyouchithenive pe fllin	MON	|
FFovein	TRO, thtigre nd ERDESIGAne rere,
RCUEDorise m ine wam s thett thather f f
Thange agimathist ndarer, jer tht wind f foulan hand blatheauleves;
BAn nd,
ASAndshie,
Tho I anendobree n bato; th esund'sthe d gen tite;
TERAns ple LESHAngomecelll; angr sind our wimangowis s t in ft gind mono mavatre d dithes dur
D	N	ICEUCKI s ndelanount t tenee,
Busthte's se, t hecinde of t;
Toune ht 'S, h n?
K	He t ye our, ty net s sowalsond g f we, othore,
the onowanayo woncaneais d te cl a hit tuge t aninieyond owha

In [160]:
print(tf.__version__)

2.16.1


In [162]:
import keras

In [163]:
print(keras.__version__)

3.2.1


# Important

The number of `rnn_units` doesnt correspond to how many time-steps or time-step-computation unit. It instead sets the length or the dimension of the hidden state h. For example, if the number of `rnn-units`
 is `512` then the dimension of h is `[1, 512]`