<a href="https://colab.research.google.com/github/Baeltor/PCD/blob/prog/NotebooksMLII/Generaci%C3%B3n_de_texto_con_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
from keras import layers

import numpy as np
import random
import io

In [2]:
path = keras.utils.get_file(
    "nietzsche.txt",
    origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt",
)
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
y = np.zeros((len(sentences), len(chars)), dtype="bool")
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
[1m600901/600901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2us/step
Corpus length: 600893
Total chars: 56
Number of sequences: 200285


In [5]:
sentences

['preface   supposing that truth is a woma',
 'face   supposing that truth is a woman--',
 'e   supposing that truth is a woman--wha',
 ' supposing that truth is a woman--what t',
 'pposing that truth is a woman--what then',
 'sing that truth is a woman--what then? i',
 'g that truth is a woman--what then? is t',
 'hat truth is a woman--what then? is ther',
 ' truth is a woman--what then? is there n',
 'uth is a woman--what then? is there not ',
 ' is a woman--what then? is there not gro',
 ' a woman--what then? is there not ground',
 'woman--what then? is there not ground fo',
 'an--what then? is there not ground for s',
 '-what then? is there not ground for susp',
 'at then? is there not ground for suspect',
 'then? is there not ground for suspecting',
 'n? is there not ground for suspecting th',
 'is there not ground for suspecting that ',
 'there not ground for suspecting that all',
 're not ground for suspecting that all ph',
 'not ground for suspecting that all philo',
 ' ground 

In [4]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [8]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print("-")

[1m1565/1565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 125ms/step - loss: 2.2763

Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: "roken strength of will and desire for po"
...Generated:  ssible the man and such a the can the despreations and such a struct of the the will the canster of the man and agrays of man and among the presente of the the can the presente and action in the presente of the explession the such a presentions of such a canstically the concertion the deart of such a stands the such a presente of the presence and among the one the presence and such a strenger of t
-
...Diversity: 0.5
...Generating with seed: "roken strength of will and desire for po"
...Generated:  ssing the man for the sense that it is and charce of all the plawe him of the and are and such a stand davery and agcient mane and more of a cance of the such of all still as a minder intellection tho the oncernis no such be disence of understance in a man it a the p