In [2]:
import tensorflow as tf
import numpy as np
import os

In [3]:
# Tuned hyperparameters
#SP: EPOCH=200 BATCH_SIZE = 64 rnn_units=682 seq_length = 80 (Ott)
#Harry: EPOCH: 100 Batch size=64 rnn_units=1024 seq_length=50 (Hpp)
#Macbeth: EPOCH=150 BATCH_SIZE = 64 rnn_units=654 seq_length = 80 (Mcb)
EPOCH=100
BATCH_SIZE = 64
rnn_units=1024
seq_length = 50
char2idx={}
idx2char=[]

In [4]:
#shift the input text by one to form the target
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

In [5]:
def create_dataset(text_as_int):
    char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
    BUFFER_SIZE = 10000
    #convert to sequences of desired size
    sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
    dataset = sequences.map(split_input_target)
    for input_example, target_example in  dataset.take(2):
        print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
        print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))
    #convert to batches
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
    return dataset

In [6]:
def perprocess(vocab,text):
    # Creating a mapping from unique characters to indices
    global char2idx
    global idx2char
    char2idx = {u:i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    #replacing the input text characters with the indices
    text_as_int = np.array([char2idx[c] for c in text])
    dataset=create_dataset(text_as_int)
    return dataset

In [11]:
#define model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.SimpleRNN(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [12]:
#define the loss function
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [13]:
#evaluate the trained model by calculating the perplexity
#perplexity is equal to the exponent of cross enthropy loss
def evaluate_perplexity(history,epoch):
    train_loss=history.history['loss'][epoch-1]
    perp= tf.exp(train_loss)
    return perp

In [14]:
def train_model(model,dataset,checkpoint_dir):
    # Name of the checkpoint files
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
    checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,save_weights_only=True)
    history = model.fit(dataset, epochs=EPOCH, callbacks=[checkpoint_callback])
    return history

In [15]:
def create_LM(corpus,checkpoint_dir):
    text = open(corpus, 'rb').read().decode(encoding='utf-8')
    vocab = sorted(set(text))
    dataset=perprocess(vocab,text=text)
    vocab_size = len(vocab)
    model = build_model(vocab_size = len(vocab),embedding_dim=256,rnn_units=rnn_units,batch_size=BATCH_SIZE)
    model.compile(optimizer='adam', loss=loss)
    history=train_model(model,dataset,checkpoint_dir)
    print(evaluate_perplexity(history,EPOCH))
    return model

In [21]:
def getsaved_model(corpus,checkpoint_dir):
    text = open(corpus, 'rb').read().decode(encoding='utf-8')
    vocab = sorted(set(text))
    vocab_size = len(vocab)
    global char2idx
    global idx2char
    char2idx = {u:i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    if(corpus=='Macbeth.txt'):
        rnn_units=654
    elif (corpus=='Othello.txt'):
        rnn_units=682
    else:
        rnn_units=1024
    tf.train.latest_checkpoint(checkpoint_dir)
    model = build_model(vocab_size, embedding_dim=256,rnn_units=rnn_units, batch_size=1)
    model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
    model.build(tf.TensorShape([1, None]))
    return model

In [17]:
def generate_text(model, start_string,num_generate):
  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)
      # using a categorical distribution to predict the character returned by the model
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)
      text_generated.append(idx2char[predicted_id])
  return (start_string + ''.join(text_generated))

In [34]:
#train the model
model=create_LM('Othello.txt',checkpoint_dir ='./trainingnew_checkpoints')

Input data:  'ACT I\r\nSCENE I. Venice. A street.\r\nEnter RODERIGO '
Target data: 'CT I\r\nSCENE I. Venice. A street.\r\nEnter RODERIGO a'
Input data:  'nd IAGO\r\nRODERIGO\r\nTush! never tell me; I take it '
Target data: 'd IAGO\r\nRODERIGO\r\nTush! never tell me; I take it m'
Train for 47 steps
Epoch 1/100
 1/47 [..............................] - ETA: 2:49

KeyboardInterrupt: 

In [28]:
#get the saved model which is already trained
#model1=getsaved_model('Macbeth.txt',checkpoint_dir ='./trainingMcb_checkpoints')
#model2=getsaved_model('Othello.txt',checkpoint_dir ='./trainingOtt_checkpoints')
model3=getsaved_model('Harry1.txt',checkpoint_dir ='./trainingHpp_checkpoints')

In [25]:
#generate text
print(generate_text(model1,start_string="MACDUFF",num_generate=996),file=open("outputM.txt", "w"))
candidate = open('outputM.txt', 'r').read()
print(candidate)

MACDUFF

Those that thubthy of them daunted death bood

That distooy done. I would thou must le's issuetimated with you; and with thoural your hands, put on your naturess,

Ag on the frownter straigot palithte every of the bleed.

LADY MACBETH

Who was your rauld me se, I may become a man;

The which no sooner fortune

S, go the three weirs so,

The pattled me they? Enter Do to the sweeter health, forech.

MALCOLM

Merchally reasen

I shof a haves like the offure

We lath the yed a preply refort

That twhell things. Gomes reth brow, med; the gun vime weel smalth'd the purp'd: why to you were thee an onemor;

And beth bock again.--Pretthen ever'd the lise.

Fits the the look broands.

MACDUFF

Bleer velove on upprewices to in.

MACDUFF

Come, grawn sted my light;

And he heard the unglusitions deep;

Who cluss of yathy

Double cart

That I requited him fuller's propar;

For a fear proy precauce the miding, on his dasters



Ant never and great craid do, and keen

Hoursed bade to bear th

In [27]:
#generate text
print(generate_text(model2,start_string="LODOVICO",num_generate=720),file=open("outputO.txt", "w"))
candidate = open('outputO.txt', 'r').read()
print(candidate)

LODOVICO

I pray you, bring is speed. Great Jime this injured in Cassio were we his weep;

And she's d seastit, and bois, Iago;

And on the practes be such so inmily,

Being not stare; or our mind, I think the sun ragoration make very fortu acaze,

But I'll se's your fear well.

Exeunt OTHELLO and Attendants



OTHELLO

A well i' the nart.

Lies the way! Moor an it; but one cup upon'd,

Must but think that he is honest.

IAGO

O, have's enk, corraise These seen his against any lack,

Will recover me?

IAGO

The hand, bet they are ore on me do might undo yecrive Montty and fear I see this;

Whose what I ams I lie.

DESDEMONA

[Singing] I cannot epre Cassio, I

won, he's all.

IAGO

I will not castaight sume,

The isteru



In [29]:
#generate text
print(generate_text(model3,start_string="it",num_generate=1906),file=open("outputH.txt", "w"))
candidate = open('outputH.txt', 'r').read()
print(candidate)

it’re cut. “how you gave his heart would be sorted it out onto the feeling suddenly. the house. he said sever half past year the dursleys had put hishopes with the weasley tooked hogwarts teacher he was talking noises but the snail and by the note his mother and threwhere’s just have been accoper on harry’s face whispered “and faster than began to lifeast — i get me ” find him anymore. the whole schools left the builda was carrito sixty filch and freced out of bed away improssible.” they’re you going?” said must have been mutter. dursley as though the happin gieading for gryffindor house now far appeared in the hat shop the rain i mean you can take him bit much like?” “worried to understand him! he kept looking chops you too. the dursleys look — ” “yes yese. they seemed to be fright winess in the walls and the full-bind school mcgonagall normat looked as if it want you and there’s d as hary would speak to them. harry woust him sir?” “good,” said hermione. “ rags pet by laugh which was 

In [35]:
len(char2idx)

65