# Importing dependencies

In [8]:
import keras
from keras import layers

import numpy as np
import random
import io

# Prepare data

In [9]:
# download the file and get the path
path = keras.utils.get_file(
    r"D:\UsingSpace\Projects\Artificial Intelligent\NaturalLanguageProcessing\Autocomplete Sentence\Version 2\nietzsche.txt",
    origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt",
)
path

'D:\\UsingSpace\\Projects\\Artificial Intelligent\\NaturalLanguageProcessing\\Autocomplete Sentence\\Version 2\\nietzsche.txt'

In [10]:
# read that file
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

Corpus length: 600893


In [11]:
text[: 500]

'preface   supposing that truth is a woman--what then? is there not ground for suspecting that all philosophers, in so far as they have been dogmatists, have failed to understand women--that the terrible seriousness and clumsy importunity with which they have usually paid their addresses to truth, have been unskilled and unseemly methods for winning a woman? certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if, indeed, it s'

In [12]:
chars = sorted(list(set(text)))  # get all the individual characters
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Total chars: 56


In [13]:
char_indices

{' ': 0,
 '!': 1,
 '"': 2,
 "'": 3,
 '(': 4,
 ')': 5,
 ',': 6,
 '-': 7,
 '.': 8,
 '0': 9,
 '1': 10,
 '2': 11,
 '3': 12,
 '4': 13,
 '5': 14,
 '6': 15,
 '7': 16,
 '8': 17,
 '9': 18,
 ':': 19,
 ';': 20,
 '=': 21,
 '?': 22,
 '[': 23,
 ']': 24,
 '_': 25,
 'a': 26,
 'b': 27,
 'c': 28,
 'd': 29,
 'e': 30,
 'f': 31,
 'g': 32,
 'h': 33,
 'i': 34,
 'j': 35,
 'k': 36,
 'l': 37,
 'm': 38,
 'n': 39,
 'o': 40,
 'p': 41,
 'q': 42,
 'r': 43,
 's': 44,
 't': 45,
 'u': 46,
 'v': 47,
 'w': 48,
 'x': 49,
 'y': 50,
 'z': 51,
 'ä': 52,
 'æ': 53,
 'é': 54,
 'ë': 55}

In [11]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

Number of sequences: 200285


In [20]:
# make it become one-hot-encoding matrix (total_sample x max length x total_chars)
x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
y = np.zeros((len(sentences), len(chars)), dtype="bool")
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

x.shape, y.shape

((200285, 40, 56), (200285, 56))

# Build the model: Single LSTM layer

In [21]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

# Prepare the text sampling function

In [22]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# Train the model

In [23]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print("-")


Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: "e that has yet been attained will in tim"
...Generated:  e that that he have that that that in the self that that it is that that it is a strung that that it is the self that the self that it is the self that that it is the self that that that that the self the self the self of the subtion that that the self the still that is that that that the self that that the self that that that is that that that that it is a strung that that that which has that is 
-
...Diversity: 0.5
...Generating with seed: "e that has yet been attained will in tim"
...Generated:  e that he has has be evelution, which morality, as the man ender of that in a scienting rearly that the such comple of the still for and that the the self in the more spirit of the sciention, that the senting that whore for that that fact has no that who the most a comple that the that the consequence that that is reals the not that is not that there first has 

# Save model

In [24]:
model.save('model.h5')

# Load model

In [2]:
from keras.models import load_model


loaded_model = load_model('model.h5')

# Test out

In [28]:
sentence = """Certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--IF,"""
sentence = sentence.lower()
sentence

'certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if,'

In [29]:
len(sentence)

127

In [30]:
len(sentence[127 - 40:])

40

In [31]:
start_index = 127 - 40

In [33]:
from time import sleep

In [34]:
generated = ""
sentence_ = sentence[start_index : start_index + 40]

# generate the next 100 chars
for i in range(100):
    print('char no.' + str(i))
    x_pred = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(sentence_):
        x_pred[0, t, char_indices[char]] = 1.0
    preds = loaded_model.predict(x_pred, verbose=0)[0]
    next_index = sample(preds, 1.0)
    next_char = indices_char[next_index]
    sentence_ = sentence_[1:] + next_char
    generated += next_char
    sentence += next_char

    print(sentence)
    print()
    sleep(0.1)

char no.0
certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if, for these may not of clumsiany, beloc teem has it saitterits of unsinnessland its democratics by th 

char no.1
certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if, for these may not of clumsiany, beloc teem has it saitterits of unsinnessland its democratics by th h

char no.2
certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if, for these may not of clumsiany, beloc teem has it saitterits of unsinnessland its democratics by th ho

char no.3
certainly she has never allowed herself to be won; and at present every kind of dogma stands with sad and discouraged mien--if, for these may not of clumsiany, beloc teem has it saitterits of unsinnessland its democratics by th how

char no.4
certainly she has never 