<a href="https://colab.research.google.com/github/saied-ahemd/TextGeneration/blob/main/NLPTextGenrator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

In [None]:
# load the data for text generator
# this is the data set
# this line download the fill in out pc
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org'
                                                          '/data/shakespeare.txt')
# now let's open the fill and decode it
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# now let's encode out text
# here we store all the unique word in a set and sort it
vocb = sorted(set(text))
# Creating a mapping from unique characters to indices
char2index = {u: i for i, u in enumerate(vocb)}
# now let's convert the vocb into numpy array
index2char = np.array(vocb)


# now let's create a function that convert the text to int

def text_to_int(txt):
    return np.array([char2index[c] for c in txt])


def int_to_text(integer):
    t = ""
    for num in integer:
        t += index2char[num] + ""

    return t


text_as_int = text_to_int(text)
# now let's create some training ex
seq_length = 100  # length of sequence for a training example
examples_per_epoch = len(text) // (seq_length + 1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
# Next we can use the batch method to turn this stream of characters into batches of desired length.
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)


# Now we need to use these sequences of length 101 and split them into input and output.


def split_input_target(chunk):  # for the example: hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text  # hell, ello


dataset = sequences.map(split_input_target)  # we use map to apply the above function to every entry
# Finally we need to make training batches.
BATCH_SIZE = 64
VOCAB_SIZE = len(vocb)  # vocab is number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024
# this to shuffle the data
BUFFER_SIZE = 10000
# and this code batch our data and shuffle it
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


# now let's build our model


def build_model(vocab_len, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_len, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_len)

    ])
    return model


model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)


# creating the loss function


def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


model.compile(optimizer='adam', loss=loss)
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
# Now we are going to setup and configure our model to save checkpoinst as it trains.
# This will allow us to load our model from a checkpoint and continue training it
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

# now let's train the model
his = model.fit(data, epochs=40, callbacks=[checkpoint_callback])


rebuild the model


In [None]:
# loading the model
# We'll rebuild the model from a checkpoint using a batch_size of 1
# so that we can feed one peice of text to the model and have it make a prediction
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

Once the model is finished training, we can find the lastest checkpoint that stores the models weights using the following line.

In [None]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

# Gerate text
now we will make a function to genrate our text


In [None]:
def genrate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 800
  # Converting our start string to numbers (vectorizing)
  input_eval = [char2index[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)
   # Empty string to store our results
  text_generated = []
  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0
  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)
    # using a categorical distribution to predict the character returned by the model
    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    # We pass the predicted character as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(index2char[predicted_id])
  return (start_string + ''.join(text_generated))  

In [None]:
inp = input("Type a starting string: ")
print(genrate_text(model, inp))

Type a starting string: ROMEO
ROMEO:
No, God have one son, so vile thee! at thou canst not
Hath satisfy the blamertiest war Netth thee by me what best.
I cannot tempt him.

First Murderer:
What stay yet the deputy with the deer? what a peril to him;
his noble counsel and conspirators:
Lo, have you not beat for barking
Than a trade that ever
I was born so fair a thousand, ho! Alas!
If they do sway with those that have worn all your sins:
I hear 'twixt him for a sermonation.

ISABELLA:
We never hear us any thing.

JULIET:
Ay, like a prophecy, Gremio?

JULIET:
What birds green like to him, as now her son
Without your own last living, their own good will,
Is this their marks on Warwick?

SICINIUS:
All but closely: now for the eagle-stick.

QUEEN ELIZABETH:
Thus hum to fear?

FLORIZEL:
Now, good sweet nurse,--

STANLEY:
Why, bro
