In [28]:
import tensorflow as tf
import tensorflow.keras as keras

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import os
import sys
import time
import pandas as pd

%matplotlib inline

In [29]:
text = open("shakespeare.txt").read()

print(len(text))
print(text[0:10])

1115395
First Citi


In [30]:
vocab = sorted(set(text))

print(len(vocab))
print(vocab)

65
['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [31]:
char2idx = {char:idx for idx,char in enumerate(vocab)}
print(char2idx)

{'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}


In [32]:
idx2char = np.array(vocab)
print(idx2char)

['\n' ' ' '!' '$' '&' "'" ',' '-' '.' '3' ':' ';' '?' 'A' 'B' 'C' 'D' 'E'
 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W'
 'X' 'Y' 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']


In [33]:
text_as_int = np.array([char2idx[c] for c in text])

print(text_as_int[0:10])
print(text[0:10])

[18 47 56 57 58  1 15 47 58 47]
First Citi


In [34]:
def split_input_target(id_text):
    return id_text[0:-1],id_text[1:]

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
seq_length = 100
seq_dataset = char_dataset.batch(seq_length+1,
                                 drop_remainder = True)

for ch_id in char_dataset.take(2):
    print(ch_id,idx2char[ch_id.numpy()])
    
for seq_id in seq_dataset.take(2):
    print(seq_id)
    print(repr("".join(idx2char[seq_id.numpy()])))
    

tf.Tensor(18, shape=(), dtype=int64) F
tf.Tensor(47, shape=(), dtype=int64) i
tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59  1], shape=(101,), dtype=int64)
'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
tf.Tensor(
[39 56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39 58 46 43 56  1
 58 53  1 42 47 43  1 58 46 39 52  1 58 53  1 44 39 51 47 57 46 12  0  0
 13 50 50 10  0 30 43 57 53 50 60 43 42  8  1 56 43 57 53 50 60 43 42  8
  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 18 47 56 57 58  6  1
 63 53 59  1 49], shape=(101,), dtype=int64)
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'


In [35]:
seq_dataset = seq_dataset.map(split_input_target)

for item_input,item_output in seq_dataset.take(2):
    print(item_input.numpy())
    print(item_output.numpy())

[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59]
[47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43  1
 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43 39
 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49  6
  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0
 37 53 59  1]
[39 56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39 58 46 43 56  1
 58 53  1 42 47 43  1 58 46 39 52  1 58 53  1 44 39 51 47 57 46 12  0  0
 13 50 50 10  0 30 43 57 53 50 60 43 42  8  1 56 43 57 53 50 60 43 42  8
  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 18 47 56 57 58  6  1
 63 53 59  1]
[56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39 58 46 43 56  1 58
 53  1 42

In [36]:
batch_size = 64
buffer_size = 10000

seq_dataset = seq_dataset.shuffle(buffer_size).batch(batch_size,
                                                     drop_remainder = True)


In [39]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
    model = keras.models.Sequential([
        keras.layers.Embedding(vocab_size,embedding_dim,
                               batch_input_shape = [batch_size,None]),
        keras.layers.SimpleRNN(units = rnn_units,
                               return_sequences = True),
        #return_sequences: Boolean.
        #Whether to return the last output in the output sequence
        #or the full sequence
        keras.layers.Dense(vocab_size)
        #not a activation?
    ])
    return model

model = build_model(vocab_size = vocab_size,
                    embedding_dim = embedding_dim,
                    rnn_units = rnn_units,
                    batch_size = batch_size)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
simple_rnn (SimpleRNN)       (64, None, 1024)          1311744   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 1,395,009
Trainable params: 1,395,009
Non-trainable params: 0
_________________________________________________________________


In [43]:
for input_example_batch,target_example_batch in seq_dataset.take(10):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape)
    

(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)
(64, 100, 65)


In [73]:
#random sampling

sample_indices = tf.random.categorical(logits = example_batch_predictions[0],
                                       num_samples = 1)
#the numerate before we do softmax
#the finall layer we do not add a activation so
#here we use logits
print(sample_indices)
print(example_batch_predictions[0])
#sampling from 65 category to 100 positions 
sample_indices = tf.squeeze(sample_indices,axis = -1)
print(sample_indices)

tf.Tensor(
[[24]
 [ 8]
 [16]
 [21]
 [ 7]
 [10]
 [12]
 [54]
 [ 0]
 [ 0]
 [16]
 [10]
 [31]
 [47]
 [59]
 [46]
 [60]
 [12]
 [50]
 [ 7]
 [13]
 [ 9]
 [34]
 [34]
 [21]
 [44]
 [ 3]
 [36]
 [61]
 [14]
 [21]
 [ 7]
 [27]
 [34]
 [36]
 [16]
 [20]
 [31]
 [45]
 [50]
 [23]
 [ 9]
 [ 8]
 [55]
 [18]
 [55]
 [ 1]
 [17]
 [49]
 [26]
 [51]
 [22]
 [41]
 [54]
 [ 8]
 [33]
 [27]
 [55]
 [24]
 [42]
 [10]
 [39]
 [20]
 [43]
 [ 6]
 [18]
 [25]
 [54]
 [64]
 [35]
 [10]
 [54]
 [52]
 [43]
 [30]
 [22]
 [37]
 [11]
 [32]
 [17]
 [17]
 [ 5]
 [54]
 [21]
 [26]
 [31]
 [34]
 [ 2]
 [63]
 [40]
 [ 8]
 [55]
 [30]
 [ 1]
 [53]
 [50]
 [32]
 [47]
 [28]
 [10]], shape=(100, 1), dtype=int64)
tf.Tensor(
[[-0.04589407  0.0088993  -0.03213925 ...  0.01168741 -0.02739757
   0.00533208]
 [ 0.02935425 -0.02046876  0.01239216 ... -0.03138732 -0.00182325
   0.09422386]
 [ 0.01021876 -0.00569511 -0.05366845 ... -0.04344088 -0.08553903
   0.00504644]
 ...
 [ 0.35374153 -0.00959506  0.15609202 ...  0.03760016 -0.04996396
   0.13295744]
 [-0.04082628 -0.1

In [48]:
print("Input: ",repr("".join(idx2char[input_example_batch[0]])))
print("Output: ",repr("".join(idx2char[target_example_batch[0]])))
print("Preditions: ",repr("".join(idx2char[sample_indices])))

Input:  "he duke my father with his power?\n\nKING RICHARD II:\nNo matter where; of comfort no man speak:\nLet's "
Output:  "e duke my father with his power?\n\nKING RICHARD II:\nNo matter where; of comfort no man speak:\nLet's t"
Preditions:  ":N?RkDMFczhNN$aiEs.r'TmSKe$t\n3:jJcGJ!UQ?Ii xQJdtK!edudllxFmIQ\nb3iQJoA,gbNn3-U3 iYl!:qpFsw!JSdiZzZh, "


In [54]:
def loss(labels,logits):
    return keras.losses.sparse_categorical_crossentropy(labels,
                                                        logits,
                                                        from_logits = True)

model.compile(optimizer = "adam",
              loss = loss)

example_loss = loss(target_example_batch,example_batch_predictions)
print(example_loss.shape)
print(example_loss.numpy().mean())

(64, 100)
4.1850195


In [56]:
output_dir = "./text_generation_checkpoints"
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
checkpoint_prefix = os.path.join(output_dir,"ckpt_{epoch}")
checkpoint_callbacks = keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_prefix,
    save_weights_only = True)

epochs = 100

history = model.fit(seq_dataset,
                    epochs = epochs,
                    callbacks = [checkpoint_callbacks],
                   )


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100

KeyboardInterrupt: 

In [58]:
tf.train.latest_checkpoint(output_dir)

'./text_generation_checkpoints/ckpt_11'

In [60]:
model2 = build_model(vocab_size = vocab_size,
                     embedding_dim = embedding_dim,
                     batch_size = 1,
                     rnn_units = rnn_units)

model2.load_weights(tf.train.latest_checkpoint(output_dir))
model2.build(tf.TensorShape([1,None]))
#1 for batch_size
#start character sequence A

model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (1, None, 1024)           1311744   
_________________________________________________________________
dense_2 (Dense)              (1, None, 65)             66625     
Total params: 1,395,009
Trainable params: 1,395,009
Non-trainable params: 0
_________________________________________________________________


In [65]:
def generate_text(model,start_string,num_generate = 1000):
    input_eval = [char2idx[c] for c in start_string]
    input_eval = tf.expand_dims(input_eval,0)
    
    text_generated = []
    model.reset_states()
    
    for _ in range(num_generate):
        #1. model inference -> prediction
        #2. sample -> ch -> text_generated
        #why sample?
        #3. update input_eval
        predictions = model(input_eval)
        #predictions : [batch_size * input_eval_length * vocab_size]
        predictions = tf.squeeze(predictions,0)
        #predictions : [input_eval_length * vocab_size]
        #in this case batch_size == 1 
        predicted_ids = tf.random.categorical(predictions,num_samples = 1)[-1,0].numpy()
        #when tf.random.categorical run it requires a 2 dims predictions
        #predicted_ids: [input_eval,1]
        #it mean sample form the vocab_size scale with a guide of
        #the weight of the model2
        text_generated.append(idx2char[predicted_ids])
        input_eval = tf.expand_dims([predicted_ids],0)
        
    return start_string + "".join(text_generated)

new_text = generate_text(model2,"All: ")
print(new_text)

All: che tomy t aive
A:
O, ad wanoullupoofiziro totha-dir indinkevinall he y RI
TERESe fiquplokede teviloshad?
Longeth AUCh f thiverafres ss

THAsers; thel Y:
HENZA.


CAzBYot:
AMI En?
Noozeel swh th te? sejursond by thers se:
VEd mus he, Pant;
H
INESers hityokINot CHAPENe
TEd s f thallleas thirachtha! s CUS e anovere Bugnkicos usexe
F the aving mperethey ted,
Wen s,
ARd an ano?
D3NTy l indinthovedinexicha, am ll maly GISutaceckeved ocared ne

CHENGHow! bupre mas,
S, dergouthe:
QUD.
Thanqunicentuthesth ce len'theran, ho ifudof tuco on s losilfo hatunocow$O, vitRICHESCHApalyo hiourcel mas ove a s
SE
ORG my.

Cle youd bovel;


HA mondom h IO:
FFile ma cofun intowake angers Goutepa touchese mo, w-nasea!
Thanis.
SThes sceres ther hendoulis,


AUScon feve wicke beswanerthege anome.
Tho;
Tha hina,
G mer senaNTizee.
ICwalimotho al analinoce ave y shend te:
D arunanetherrs tus hous ththeshen'l utho p-ngrinkiferonck tas,
US, awans:
Fithomonemanove ous wivars, movethont heare:
AR wanontoug is my

In [72]:
samples = tf.random.categorical(tf.math.log([[0.5,0.5]]), 5)
print(samples)

tf.Tensor([[1 0 1 0 1]], shape=(1, 5), dtype=int64)
