In [3]:
import numpy as np

with open('../../python-machine-learning-book-3rd-edition-master/ch16/1268-0.txt', 'r', encoding='utf-8') as fp:
    text=fp.read()
    
start_indx = text.find('THE MYSTERIOUS ISLAND')
end_indx = text.find('End of the Project Gutenberg')

text = text[start_indx:end_indx]
char_set = set(text)
print('Total Length:', len(text))
print('Unique Characters:', len(char_set))

Total Length: 1112350
Unique Characters: 80


In [4]:
chars_sorted = sorted(char_set)
char2int = {ch:i for i,ch in enumerate(chars_sorted)}
char_array = np.array(chars_sorted)

text_encoded = np.array(
    [char2int[ch] for ch in text],
    dtype=np.int32
)

In [5]:
print(text[:15], '     == Encoding ==> ', text_encoded[:15])
print(text_encoded[15:21], ' == Reverse  ==> ', ''.join(char_array[text_encoded[15:21]]))

THE MYSTERIOUS       == Encoding ==>  [44 32 29  1 37 48 43 44 29 42 33 39 45 43  1]
[33 43 36 25 38 28]  == Reverse  ==>  ISLAND


In [6]:
import tensorflow as tf

ds_text_encoded = tf.data.Dataset.from_tensor_slices(text_encoded)
for ex in ds_text_encoded.take(5):
    print(f'{ex.numpy()} -> {char_array[ex.numpy()]}')

44 -> T
32 -> H
29 -> E
1 ->  
37 -> M


In [7]:
seq_length = 40
chunk_size = seq_length + 1
ds_chunks = ds_text_encoded.batch(chunk_size, drop_remainder=True)

def split_input_target(chunk):
    input_seq = chunk[:-1]
    target_seq = chunk[1:]
    return input_seq, target_seq

ds_sequences = ds_chunks.map(split_input_target)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [8]:
for example in ds_sequences.take(2):
    print(' Input (x):', repr(''.join(char_array[example[0].numpy()])))
    print('Target (y):', repr(''.join(char_array[example[1].numpy()])))
    print()

 Input (x): 'THE MYSTERIOUS ISLAND ***\n\n\n\n\nProduced b'
Target (y): 'HE MYSTERIOUS ISLAND ***\n\n\n\n\nProduced by'

 Input (x): ' Anthony Matonak, and Trevor Carlson\n\n\n\n'
Target (y): 'Anthony Matonak, and Trevor Carlson\n\n\n\n\n'



In [9]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000
ds = ds_sequences.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [10]:
def build_model(vocab_size, embedding_dim, rnn_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [11]:
charset_size = len(char_array)
embedding_dim = 256
rnn_units = 512
tf.random.set_seed(1)
model = build_model(
    vocab_size=charset_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 256)         20480     
_________________________________________________________________
lstm (LSTM)                  (None, None, 512)         1574912   
_________________________________________________________________
dense (Dense)                (None, None, 80)          41040     
Total params: 1,636,432
Trainable params: 1,636,432
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
)
model.fit(ds, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x25b366bf100>

In [18]:
tf.random.set_seed(1)

logits = [[1.0, 1.0, 1.0]]
print('Probabilities:', tf.math.softmax(logits).numpy()[0])

samples = tf.random.categorical(logits=logits, num_samples=100)
tf.print(samples.numpy())

Probabilities: [0.33333334 0.33333334 0.33333334]
array([[0, 0, 1, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 2, 0, 1, 1, 2, 0,
        2, 0, 0, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,
        2, 0, 0, 0, 1, 0, 1, 0, 2, 0, 2, 1, 0, 1, 1, 2, 1, 1, 2, 1, 0, 0,
        2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 0,
        0, 2, 2, 1, 2, 0, 1, 2, 2, 1, 2, 0]], dtype=int64)


In [19]:
tf.random.set_seed(1)

logits = [[1.0, 1.0, 8.0]]
print('Probabilities:', tf.math.softmax(logits).numpy()[0])

samples = tf.random.categorical(logits=logits, num_samples=100)
tf.print(samples.numpy())

Probabilities: [9.1022195e-04 9.1022195e-04 9.9817955e-01]
array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]], dtype=int64)


In [20]:
def sample(model, starting_str, len_generated_text=500, max_input_length=40, scale_factor=1.0):
    encoded_input = [char2int[s] for s in starting_str]
    encoded_input = tf.reshape(encoded_input, (1, -1))

    generated_str = starting_str

    model.reset_states()
    for i in range(len_generated_text):
        logits = model(encoded_input)
        logits = tf.squeeze(logits, 0)

        scaled_logits = logits * scale_factor
        new_char_indx = tf.random.categorical(
            scaled_logits, num_samples=1)
        
        new_char_indx = tf.squeeze(new_char_indx)[-1].numpy()    

        generated_str += str(char_array[new_char_indx])
        
        new_char_indx = tf.expand_dims([new_char_indx], 0)
        encoded_input = tf.concat(
            [encoded_input, new_char_indx],
            axis=1
        )
        encoded_input = encoded_input[:, -max_input_length:]

    return generated_str

In [22]:
tf.random.set_seed(1)

print(sample(model, starting_str='The island'))

The island is probable that the viewing from he smilfrals continued; but it would inhabited to get complete some condition; it was completely two cape birds. It was
agreed that
they had suffended from the mountain.

Now, he
is not the damo, them is his possessioned no gotallets, and it was necessary to give under the
Chimneys.

All these teights whomselves in
branches, during these restering pur.
If greatly cleared from a tonn his hopes, moon had disasperied eight themselves out in the baulist,
pearance, 


In [24]:
tf.random.set_seed(1)

print(sample(model, starting_str='The island', scale_factor=2.0))

The island was so as to make a sufficient to search the sailor.

“We must there!”

“That is to say, the colonists the settlers were the corral, had been able to the corral and all the river, and they had not appeared to the
corral which formed the protection of the island, which he entirely came to the corral!

“We shall be a man go the subjection, to the shore, or the corral, who had been convicts were to be done but to return to the southern plants, and the captain was of the lake. The wind had already 


In [26]:
tf.random.set_seed(1)

print(sample(model, starting_str='The island', scale_factor=0.5))

The island hanging he drefude? Top wished-from
Granfop.
PEbintiNausualavting-yig. The
Crinces: I knew solid morig, if upberreps. He saw, I hasty, a labaphirrihuly?”

When Captain
Surior,” me amugiluzage.”

Wiir torce?” mumb
all,” asked Captajt to their, broke: better certain, pieces it?”

“STance!” exited. Luid
it cod incandanc,
a misery.”

At thisfi’k,”
said
Gideon. To worting. “Cape, greshnewi8-qoight,”.

For. In cqusy griet, duriousnets,
Pencrofts wassigake,
coptaless, carmlured useff.”

Lonag?
a8rror 
