In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
path = tf.keras.utils.get_file('shakespear.txt','https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt')
text = open(path,'rb').read().decode(encoding='utf-8')
print(len(text))

Downloading data from https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt
99993


In [4]:
vocab = sorted(set(text))
print(vocab)

['\n', ' ', '!', "'", ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [5]:
char2idx = {u:i for i,u in enumerate(vocab)}
idx2char = np.array(vocab)

for char,_ in zip(char2idx,range(len(vocab))):
    print('     {:4s}:{:3d},'.format(repr(char),char2idx[char]))

     '\n':  0,
     ' ' :  1,
     '!' :  2,
     "'" :  3,
     ',' :  4,
     '-' :  5,
     '.' :  6,
     ':' :  7,
     ';' :  8,
     '?' :  9,
     'A' : 10,
     'B' : 11,
     'C' : 12,
     'D' : 13,
     'E' : 14,
     'F' : 15,
     'G' : 16,
     'H' : 17,
     'I' : 18,
     'J' : 19,
     'K' : 20,
     'L' : 21,
     'M' : 22,
     'N' : 23,
     'O' : 24,
     'P' : 25,
     'Q' : 26,
     'R' : 27,
     'S' : 28,
     'T' : 29,
     'U' : 30,
     'V' : 31,
     'W' : 32,
     'X' : 33,
     'Y' : 34,
     'Z' : 35,
     'a' : 36,
     'b' : 37,
     'c' : 38,
     'd' : 39,
     'e' : 40,
     'f' : 41,
     'g' : 42,
     'h' : 43,
     'i' : 44,
     'j' : 45,
     'k' : 46,
     'l' : 47,
     'm' : 48,
     'n' : 49,
     'o' : 50,
     'p' : 51,
     'q' : 52,
     'r' : 53,
     's' : 54,
     't' : 55,
     'u' : 56,
     'v' : 57,
     'w' : 58,
     'x' : 59,
     'y' : 60,
     'z' : 61,


In [8]:
text_as_int = np.array([char2idx[c] for c in text])
print(repr(text[:50]))
print('{}'.format(repr(text_as_int[:50])))

"That, poor contempt, or claim'd thou slept so fait"
array([29, 43, 36, 55,  4,  1, 51, 50, 50, 53,  1, 38, 50, 49, 55, 40, 48,
       51, 55,  4,  1, 50, 53,  1, 38, 47, 36, 44, 48,  3, 39,  1, 55, 43,
       50, 56,  1, 54, 47, 40, 51, 55,  1, 54, 50,  1, 41, 36, 44, 55])


In [9]:
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
seq_length = 100
sequences = char_dataset.batch(seq_length+1,drop_remainder=True)

for item in sequences.take(10):
    print(repr(''.join(idx2char[item.numpy()])))

"That, poor contempt, or claim'd thou slept so faithful,\nI may contrive our father; and, in their defe"
'ated queen,\nHer flesh broke me and puttance of expedition house,\nAnd in that same that ever I lament '
'this stomach,\nAnd he, nor Butly and my fury, knowing everything\nGrew daily ever, his great strength a'
"nd thought\nThe bright buds of mine own.\n\nBIONDELLO:\nMarry, that it may not pray their patience.'\n\nKIN"
'G LEAR:\nThe instant common maid, as we may less be\na brave gentleman and joiner: he that finds us wit'
"h wax\nAnd owe so full of presence and our fooder at our\nstaves. It is remorsed the bridal's man his g"
'race\nfor every business in my tongue, but I was thinking\nthat he contends, he hath respected thee.\n\nB'
"IRON:\nShe left thee on, I'll die to blessed and most reasonable\nNature in this honour, and her bosom "
'is safe, some\nothers from his speedy-birth, a bill and as\nForestem with Richard in your heart\nBe ques'
"tion'd on, nor that I was enough:\nWhic

In [10]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text,target_text

dataset = sequences.map(split_input_target)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [11]:
for input_example, target_example in dataset.take(1):
    print('Input data: ',repr(''.join(idx2char[input_example.numpy()])))
    print('Target data: ',repr(''.join(idx2char[target_example.numpy()])))

Input data:  "That, poor contempt, or claim'd thou slept so faithful,\nI may contrive our father; and, in their def"
Target data:  "hat, poor contempt, or claim'd thou slept so faithful,\nI may contrive our father; and, in their defe"


In [12]:
print(dataset)

<MapDataset shapes: ((100,), (100,)), types: (tf.int32, tf.int32)>


In [13]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE,drop_remainder=True)
print(dataset)

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int32, tf.int32)>


In [16]:
def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size,embedding_dim,
                                  batch_input_shape=[batch_size,None]),
        tf.keras.layers.LSTM(rnn_units,
                             return_sequences=True,
                             stateful=True,
                             recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

model = build_model(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE
)

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, None, 256)           15872     
_________________________________________________________________
lstm_1 (LSTM)                (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 62)            63550     
Total params: 5,326,398
Trainable params: 5,326,398
Non-trainable params: 0
_________________________________________________________________


In [18]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input:",input_example_batch.shape,"# (batch_size, sequence_lenght)")
    print("Target:",target_example_batch.shape,"# (batch_size, sequence_lenght)")

Input: (64, 100) # (batch_size, sequence_lenght)
Target: (64, 100) # (batch_size, sequence_lenght)


In [19]:
for input_example_batch,target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print("Prediction: ", example_batch_predictions.shape, "# (batch_size,sequence_lenght,vocab_size)")

Prediction:  (64, 100, 62) # (batch_size,sequence_lenght,vocab_size)


In [20]:
sampled_indices = tf.random.categorical(example_batch_predictions[0],num_samples=1)
sampled_indices_characters = tf.squeeze(sampled_indices,axis=-1).numpy()
print(sampled_indices_characters)

[40 52  3 14 49 34 16 51 37  6 26 35  5 51 40 51  2 56 52 43 47 13 18 11
 26 13 12  2 42 59 45 33  1 58  7 43 12 48 15 55 41 61 29  5 56 11 57 11
 30 51  7  2 26 51 23 54 28 41 14 32 45 58 27 59 38  7  1 18 32  2 13 11
 41 58 40  5  1 47 11 54 28 57 55 23 23 36 56 42 41  2 14 48 49  7 15 49
 52  8 53 16]


In [21]:
def loss(labels,logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels,logits, from_logits=True)

model.compile(optimizer='adam',loss=loss)

In [22]:
import os

In [25]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir,"ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [26]:
EPOCHS = 50
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [28]:
model = build_model(vocab_size,embedding_dim,rnn_units,batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))

In [29]:
def generate_text(model,start_string):
    num_generate = 500
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval,0)
    text_generated = []

    temperature = 0.5

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions,0)
        predictions = predictions/temperature
        predicted_id = tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id],0)
        text_generated.append(idx2char[predicted_id])
    return (start_string+"".join(text_generated))

In [30]:
print(generate_text(model,start_string=u"Alcohol"))

Alcohold him staff.

KING PHILIP:
Ay, but you do not in the sight of Norfolk live such ground
My tongue shall be brief in the dearcons a speed;
And well well in the our own officers and preether,
But never will that should be melted as you have in language.

CORIOLANUS:
Wherefore, Volscous?
Hang you to any as my husband, and out and a part, so gentle this time.

FALSTAFF:
I will challenge this good creature, and some break together:
But such a friends and presentle between; and thus army army'd to the 


In [31]:
print(generate_text(model,start_string=u"Neurona"))

Neuronath some mestly ted
And the chy fair in kind;
And despres did with a goodly ducats!

DUKE VINCENTIO:
Why, so something becomes my strength, how add
their sword of thee!
Duth the stryection and the moud of Corfess;
Prother of your hands with sight with a greet bedory my streegnt of heaven and them that begg'd it

ElvINA:
Now, for my mouth, men walk in both the sea:
On head not call thought to speak, with me.

GUKE OF YORK:
I'll praper a shormed, her couns?

Fersenter:
Ay, that it sooner saw me gon


In [34]:
print(generate_text(model,start_string=u"humanidad "))

humanidad you here,
Or the lest not live a frolt of this thought
I'll slep it ander to the sun as his man:
And did thou should ever I fair.

DUCHESS:
Nay, I'll ever throw his honour.

OTHELLO:
Good morrow, amen.

CRESSIDA:
To stop the callert peace, would not then he stands, and such a fould of deaven.

CLEOPATRA:
What is 'Tis more, Brutus? They are bloody the seas:
Whiles we are ploody of her father and his same
That gleats I death in many hearts to observe
To take what they have here?

PASSIUS:
On my Lo
