In [None]:
!pip install tensorflow
!pip install numpy

In [None]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing 
import numpy as np 
import os 
import time

In [None]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [None]:
print(path_to_file) 
text = open(path_to_file, 'rb').read().decode(encoding = 'utf-8')
print(f'Length of text: {len(text)} characters')

In [None]:
print(text[:250])

In [None]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

In [None]:
example_texts = ['abcdefg', 'xyz'] # 'abcdefg' -> ['a', 'b', 'c', 'd' ...]
chars = tf.strings.unicode_split(example_texts, input_encoding = 'UTF-8')
chars

In [None]:
ids_froms_chars = preprocessing.StringLookup(
vocabulary = list(vocab))

In [None]:
ids = ids_froms_chars(chars)
#ids

chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary = ids_froms_chars.get_vocabulary(), invert = True
)

In [None]:
chars = chars_from_ids(ids)
chars

In [None]:
tf.strings.reduce_join(chars, axis =-1).numpy()

In [None]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars, axis =-1).numpy()

In [None]:
#Training and testing
all_ids = ids_froms_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids) #Creating the dataset to train the model 

In [None]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

In [None]:
seq_length = 100 
examples_per_epoch = len(text) // (seq_length + 1)

examples_per_epoch

In [None]:
sequence = ids_dataset.batch(seq_length + 1, drop_remainder = True)
for seq in sequence.take(1):
    print(chars_from_ids(seq))

In [None]:
for seq in sequence.take(5):
    print(text_from_ids(seq))

In [None]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1: ]
    return input_text, target_text

split_input_target("Tensorflow")

In [None]:
dataset = sequence.map(split_input_target)

In [None]:
for input_example, target_example in dataset.take(1):
    print(text_from_ids(input_example.numpy()))
    print(text_from_ids(target_example.numpy()))

In [None]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder = True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

dataset

In [None]:
vocab_size = len(vocab)

embedding_dim = 256
rnn_units = 1024

In [None]:
class MyModel(tf.keras.Model): #Don't worry about this code for nwo
      def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                       return_sequences=True,
                                       return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

      def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
          states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
          return x, states
        else:
          return x

In [None]:
def train_model(EPOCHS = 20, ):
    model = MyModel(
    vocab_size = len(ids_froms_chars.get_vocabulary()),
    embedding_dim = embedding_dim, 
    rnn_units = rnn_units )

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True) #Loss function
    model.compile(optimizer = 'adam', loss = loss)


    checkpoint_dir = './training_checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath = checkpoint_prefix, 
        save_weights_only = True
    )
    
    history = model.fit(dataset, epochs = EPOCHS, callbacks = [checkpoint_callback])
    return model

model = train_model(40)


In [None]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "" or "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['', '[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_froms_chars)


In [None]:
def generate_n_lines(lines = 1000, starter_text = 'ROMEO: ', one_step_model = None):
    
    if one_step_model is None:
        raise ValueError('There was no model passed. Please pass a model using the one_step_model parameter')
    
    start = time.time()
    states = None
    next_char = tf.constant([starter_text])
    result = [next_char]
    
    for n in range(lines):
        next_char, states = one_step_model.generate_one_step(next_char, states = states)
        result.append(next_char)
        
    result = tf.strings.join(result)
    end = time.time()
    
    print(f'Time: {end - start}')
    return (tf.strings.join(result).numpy().decode("utf-8"))

text1 = generate_n_lines(lines = 500)
print(text1)


In [14]:
#Reload the model
one_step_reloaded = tf.saved_model.load('one-step2')
text1 = generate_n_lines(lines = 1000, starter_text = 'MERCUTIO: ', one_step_model = one_step_reloaded)
print(text1)

Time: 2.340980052947998
MERCUTIO: Well, I would
thou wert a god o' the middle; and beat your life
Be so for a maid.

MARCIUS:
Nay, then he sings with wolves to do you
That I must die to-morrow.

LUCIO:

ISABELLA:

DUREE:
Plead them not.

Capervet me: Even to the Tower!

WARWICK:
This must be needle, each one foul wrong,
And those whose deeds ere lancetted people on the nost
With cloth of all scorns of her own rouble try.

DUKE OF AUMERLE:
Then, England's house, sir, change it first sees again,
Like pleasant travels or two successment,
Give him dead midi's limit.

CAMILLO:
Nay, but it is too to?

Second Murderer:
A black way to our forcenting tent;
While we bethink a needle rest in that beggar.
Or, for us thus can lensteners that I had;
Thou strokedst musters presuring treaths
from my browh, thou art a traitor and a rap
Hate the wars. O that hath thrust the old
friendloved and the friar, the smiles of Came hath made
With heavier acond him with the regal crowning?
Thou stood to me; my st