<a href="https://www.kaggle.com/code/sagorkumarmitra/natural-language-processing?scriptVersionId=145777762" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
path_to_file="/kaggle/input/nlp-shakespeare/shakespeare.txt"

In [None]:
text=open(path_to_file,'r').read()

In [None]:
text[:500]

In [None]:
print(text[:500])

In [None]:
print(text[50000:55000])

In [None]:
vocab=sorted(set(text))

In [None]:
vocab

In [None]:
len(vocab)

In [None]:
for pair in enumerate(vocab):
    print(pair)

In [None]:
char_to_ind={char:ind for ind,char in enumerate(vocab)}
ind_to_char=np.array(vocab)

In [None]:
encoded_text=np.array([char_to_ind[c] for c in text])

In [None]:
encoded_text.shape

In [None]:
sample=text[:500]

In [None]:
print(sample)

In [None]:
print(encoded_text[:500])

In [None]:
lines='''
From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
'''

In [None]:
len(lines)

In [None]:
seq_len=120

In [None]:
total_num_seq=len(text) // (seq_len+1)

In [None]:
total_num_seq

In [None]:
char_dataset=tf.data.Dataset.from_tensor_slices(encoded_text)

In [None]:
sequences=char_dataset.batch(seq_len+1,drop_remainder=True)

In [None]:
def create_seq_targets(seq):
    input_txt=seq[:-1]
    target_txt=seq[1:]
    return input_txt,target_txt

In [None]:
dataset=sequences.map(create_seq_targets)

In [None]:
for input_txt,target_txt in dataset.take(1):
    print(input_txt.numpy())
    print("".join(ind_to_char[input_txt.numpy()]))
    print('\n')
    print(target_txt.numpy())
    print("".join(ind_to_char[target_txt.numpy()]))

In [None]:
batch_size=128
buffer_size=10000

dataset=dataset.shuffle(buffer_size).batch(batch_size,drop_remainder=True)

In [None]:
embed_dim=64
rnn_neurons=1026
vocab_size=len(vocab)

In [None]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [None]:
def sparse_cat_loss(y_true,y_pred):
    return sparse_categorical_crossentropy(y_true,y_pred,from_logits=True)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,GRU,Dense

In [None]:
def create_model(vocab_size,embed_dim,rnn_neurons,batch_size):
    model=Sequential()
    model.add(Embedding(vocab_size,embed_dim,batch_input_shape=[batch_size,None]))
    model.add(GRU(rnn_neurons,return_sequences=True,
                 stateful=True,recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))
    
    model.compile('adam',loss=sparse_cat_loss)

    return model

In [None]:
model=create_model(vocab_size=vocab_size,
                  embed_dim=embed_dim,
                  rnn_neurons=rnn_neurons,
                  batch_size=batch_size)

In [None]:
model.summary()

In [None]:
epochs=20
model.fit(dataset,epochs=epochs)

In [None]:
def generate_text(model, start_seed,gen_size=500,temp=1.0):
  # Number of characters to generate
  num_generate = gen_size

  # Vecotrizing starting seed text
  input_eval = [char_to_ind[s] for s in start_seed]

  # Expand to match batch format shape
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty list to hold resulting generated text
  text_generated = []

  temperature = temp

  # Here batch size == 1
  model.reset_states()

  for i in range(num_generate):

      # Generate Predictions
      predictions = model(input_eval)

      # Remove the batch shape dimension
      predictions = tf.squeeze(predictions, 0)

      # Use a cateogircal disitribution to select the next character
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # Pass the predicted charracter for the next input
      input_eval = tf.expand_dims([predicted_id], 0)

      # Transform back to character letter
      text_generated.append(ind_to_char[predicted_id])

  return (start_seed + "".join(text_generated))