In [1]:
use_colab = True
assert use_colab in [True, False]

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf

import numpy as np
import os
import time

In [4]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [5]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f'Text length: {len(text)}')

Text length: 1115394


In [6]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [7]:
vocab = sorted(set(text))
print(f'Unique letters: {len(vocab)}')

Unique letters: 65


In [8]:
# text indexing
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [9]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...
}


In [10]:
print(f'{repr(text[:13])} ---- Index ---- > {text_as_int[:13]}')

'First Citizen' ---- Index ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [11]:
# max length
seq_length = 100
examples_per_epoch = len(text)//seq_length

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])

F
i
r
s
t


In [12]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [13]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [14]:
BATCH_SIZE = 64

dataset = dataset.shuffle(10000).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [15]:
vocab_size = len(vocab)

embedding_dim = 256

rnn_units = 1024

In [16]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
    ])

    return model

In [17]:
model = build_model(
    vocab_size = vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [19]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)

example_batch_loss = loss(target_example_batch, example_batch_predictions)

In [20]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss=loss)

In [21]:
# the save point
if use_colab:
    checkpoint_dir ='./drive/My Drive/train_ckpt/text_gen/exp1'
    if not os.path.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)
else:
    checkpoint_dir = 'text_gen/exp1'

cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
                                                 save_weights_only=True,
                                                 monitor='loss',
                                                 mode='auto',
                                                 save_best_only=True,
                                                 verbose=1)

In [22]:
EPOCHS = 20

In [23]:
history = model.fit(dataset, 
                    epochs=EPOCHS,
                    callbacks=[cp_callback])

Epoch 1/20
Epoch 00001: loss improved from inf to 2.55262, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 2/20
Epoch 00002: loss improved from 2.55262 to 1.86866, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 3/20
Epoch 00003: loss improved from 1.86866 to 1.62340, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 4/20
Epoch 00004: loss improved from 1.62340 to 1.49156, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 5/20
Epoch 00005: loss improved from 1.49156 to 1.41170, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 6/20
Epoch 00006: loss improved from 1.41170 to 1.35602, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 7/20
Epoch 00007: loss improved from 1.35602 to 1.31092, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 8/20
Epoch 00008: loss improved from 1.31092 to 1.27122, saving model to ./drive/My Drive/train_ckpt/text_gen/exp1
Epoch 9/20
Epoch 00009: loss improve

In [24]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(checkpoint_dir)

model.build(tf.TensorShape([1, None]))

In [25]:
def generate_text(model, start_string):
  num_generate = 1000

  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  # Generates more predictable text if temperature is lower
  temperature = 1.0

  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)

      predictions = tf.squeeze(predictions, 0)

      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [26]:
print(generate_text(model, start_string=u"ROMEO: "))

ROMEO: ng your cousin, I
Should by and by her music was a mighty
Waxery and a slain of all that sins and star.

Lieven rust unworthy lord?

GLOUCESTER:
O, no! thought, easide, princely shepheddewits,
And time look hearth and dute and word Claudio
Lord Hastings' late let this land on more me? that I may nd,
That I entreat the winds of me,
But sweeter that we look'd it with me?

MIRANNA:
You will hear you?
From queen, thou liest. And he will come to thee,
When I call us,
As any man with her, and as much less
The country of the people queens for thy love again,
You head of conscience, insteemance more again
To all execution again
me?


EDWARD:
I would thou wilt be heard, or I'll have heart;
And therefore cannot he?

GREMIO:
Take my help. The king, ure not; for he is honour,
Out of her good, he kings and heaps and enter.
What m
Clarence is Baptixtales: all as mine and thee of the
sheep-broad; and 'twas in thy just?
For battle ripe and point but white
Or they dispatch: thou art too much saf