In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [2]:
with open('hamlet_1.txt', encoding= 'utf-8') as f:
    hamlet_1_text= f.read()

with open('hamlet_2.txt', encoding= 'utf-8') as f:
    hamlet_2_text= f.read()

with open('hamlet_3.txt', encoding= 'utf-8') as f:
    hamlet_3_text= f.read()

In [3]:
tokenizer= tf.keras.preprocessing.text.Tokenizer(char_level= True)
tokenizer.fit_on_texts([hamlet_1_text,hamlet_2_text,hamlet_3_text])

In [4]:
max_id= len(tokenizer.word_index)
print(max_id)

42


In [5]:
hamlet_1_encoded, hamlet_2_encoded, hamlet_3_encoded= tokenizer.texts_to_sequences([hamlet_1_text, hamlet_2_text, hamlet_3_text])
hamlet_1_encoded= np.asarray(hamlet_1_encoded)- 1
hamlet_2_encoded= np.asarray(hamlet_2_encoded)- 1
hamlet_3_encoded= np.asarray(hamlet_3_encoded)- 1

In [6]:
hamlet_1_decoded= ''.join(tokenizer.sequences_to_texts([hamlet_1_encoded + 1]))

In [7]:
hamlet_1_dataset= tf.data.Dataset.from_tensor_slices(hamlet_1_encoded)
hamlet_2_dataset= tf.data.Dataset.from_tensor_slices(hamlet_2_encoded)
hamlet_3_dataset= tf.data.Dataset.from_tensor_slices(hamlet_3_encoded)

In [8]:
T= 100
window_length= T + 1

In [9]:
hamlet_1_dataset= hamlet_1_dataset.window(size= window_length, shift= 1, drop_remainder=True)
hamlet_2_dataset= hamlet_2_dataset.window(size= window_length, shift= 1, drop_remainder=True)
hamlet_3_dataset= hamlet_3_dataset.window(size= window_length, shift= 1, drop_remainder=True)

In [10]:
hamlet_1_dataset= hamlet_1_dataset.flat_map(lambda window:window.batch(window_length))
hamlet_2_dataset= hamlet_2_dataset.flat_map(lambda window:window.batch(window_length))
hamlet_3_dataset= hamlet_3_dataset.flat_map(lambda window:window.batch(window_length))

In [11]:
hamlet_dataset= (hamlet_1_dataset.concatenate(hamlet_2_dataset)).concatenate(hamlet_3_dataset)

In [12]:
tf.random.set_seed(0)
batch_size= 32
hamlet_dataset= hamlet_dataset.repeat().shuffle(buffer_size= 10000).batch(batch_size, drop_remainder= True)

In [13]:
hamlet_dataset= hamlet_dataset.map(lambda window_batch: (window_batch[:, 0:100], window_batch[:, 1:101]))

In [14]:
hamlet_dataset= hamlet_dataset.map(lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [15]:
hamlet_dataset= hamlet_dataset.prefetch(buffer_size= 1)

In [16]:
steps_per_epoch= int(((len(hamlet_1_encoded)+len(hamlet_2_encoded)+len(hamlet_3_encoded))-3*T)/batch_size)

In [17]:
model=keras.models.Sequential([keras.layers.GRU(128,return_sequences=True, input_shape=[None,max_id]),
                                keras.layers.GRU(128,return_sequences=True),
                            keras.layers.TimeDistributed(keras.layers.Dense(max_id,activation="softmax"))])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")

In [18]:
model= tf.keras.models.load_model('hamlet_model.h5')

In [19]:
def preprocess(texts):
    x= np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(x, max_id)

In [20]:
def next_char(text, temperature=1):
    X_new = preprocess(text)
    y_proba = model.predict(X_new)[-1]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [21]:
def complete_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text,temperature)
    return text

In [22]:
complete_text(hamlet_1_text, n_chars=1000, temperature=1)















