In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [3]:
path_to_file = 'shakespeare.txt' 
with open(path_to_file,'r') as f: 
    text = f.read()
    

In [4]:
vocab = sorted(set(text))

In [5]:
char_to_ind = {u:i for i, u in enumerate(vocab)} 
ind_to_char = np.array(vocab) 
encoded_text = np.array([char_to_ind[char] for char in text]) 
encoded_text

array([ 0,  1,  1, ..., 30, 39, 29])

In [6]:
seq_len = 120 
total_num_seq = len(text) // (seq_len + 1)

In [7]:
# creatimg training sequences 
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)
sequences = char_dataset.batch(seq_len + 1, drop_remainder=True) 

def create_seq_targets(seq):
    input_txt = seq[:-1]
    target_txt = seq[1:]

    return input_txt, target_txt 



In [8]:
dataset = sequences.map(create_seq_targets) 

In [9]:
batch_size = 128 
buffer_size = 10000 
dataset = dataset.shuffle(buffer_size=buffer_size).batch(batch_size, drop_remainder=True)


In [10]:
# length of the vocabulary in chars 
vocab_size = len(vocab) 
# the embedding dimension 
embed_dim = 64 
# number of rnn units 
rnn_neurons = 1026 

In [11]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, GRU 
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [12]:
def sparse_cat_loss(y_true,y_pred):
  return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)
  
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    model = Sequential()
    model.add(Embedding(vocab_size, embed_dim,batch_input_shape=[batch_size, None]))
    model.add(GRU(rnn_neurons,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'))
    # Final Dense Layer to Predict
    model.add(Dense(vocab_size))
    model.compile(optimizer='adam', loss=sparse_cat_loss) 
    return model

In [13]:
model = create_model(
    vocab_size=vocab_size, 
    embed_dim=embed_dim,
    rnn_neurons=rnn_neurons,
    batch_size=batch_size
)

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (128, None, 64)           5376      
_________________________________________________________________
gru (GRU)                    (128, None, 1026)         3361176   
_________________________________________________________________
dense (Dense)                (128, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [15]:
from tensorflow.keras.models import load_model
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)

model.load_weights('shakespeare_gen.h5')

model.build(tf.TensorShape([1, None]))



In [16]:
def generate_text(model, start_seed, gen_size = 100, temp = 1.0): 
    num_generate = gen_size 

    input_eval = [char_to_ind[s] for s in start_seed] 
    input_eval = tf.expand_dims(input_eval,0) 

    text_generated = [] 

    temperature = temp 

    model.reset_states()

    for i in range(num_generate): 
        predictions = model(input_eval) 

        predictions = model(input_eval) 

        predictions = tf.squeeze(predictions,0) 

        predictions = predictions / temperature 

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(ind_to_char[predicted_id]) 


    return start_seed + ''.join(text_generated)
