In [None]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(10):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=1,
          callbacks=[print_callback])

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
corpus length: 600893
total chars: 57
nb sequences: 200285
Vectorization...
Build model...




----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "is own thoughts as if they came from the"
is own thoughts as if they came from the prelis th
----- diversity: 0.5
----- Generating with seed: "is own thoughts as if they came from the"
is own thoughts as if they came from the the pored
----- diversity: 1.0
----- Generating with seed: "is own thoughts as if they came from the"
is own thoughts as if they came from the
des in wa
----- diversity: 1.2
----- Generating with seed: "is own thoughts as if they came from the"
is own thoughts as if they came from the
masstar, 


<keras.src.callbacks.History at 0x7cd83c145a80>

In [None]:
!pip install ipdb

Collecting ipdb
  Downloading ipdb-0.13.13-py3-none-any.whl (12 kB)
Collecting jedi>=0.16 (from ipython>=7.31.1->ipdb)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipdb
Successfully installed ipdb-0.13.13 jedi-0.19.1


In [None]:
'''
Using 1D Convs for generate text from rom Nietzsche's and Shakepear's
writings
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
from keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D
import numpy as np
import random
import sys
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')

# 2nd Dataset : Combined works of Shakespeare
# path = get_file('shakespeare_input.txt', origin='https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt')

with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# print(x.shape[0])
# print(x.shape[1])
# print(x.shape[2])


def build_model(maxlen, chars):

    print('Build model...')
    model = Sequential()
    model.add(Conv1D(128, 5, padding='same', activation='relu', input_shape=(maxlen, len(chars))))
    model.add(Flatten())
    model.add(Dense(len(chars), activation='softmax'))

    # model 2 : VGG net like architecture, 5 layers
#     model.add(Conv1D(64, 3, activation='relu',input_shape=(maxlen, len(chars))))
#     model.add(Conv1D(64, 3, activation='relu'))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Conv1D(128, 3, activation='relu'))
#     model.add(Conv1D(128, 3, activation='relu'))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Flatten())
#     model.add(Dense(len(chars), activation='softmax'))

    #model 3: adapted from https://keras.io/getting-started/sequential-model-guide/
#     model.add(Conv1D(64, 3, activation='relu',input_shape=(maxlen, len(chars))))
#     model.add(Conv1D(64, 3, activation='relu'))
#     model.add(MaxPooling1D(pool_size=3))
#     model.add(Conv1D(128, 3, activation='relu'))
#     model.add(Conv1D(128, 3, activation='relu'))
#     model.add(MaxPooling1D(pool_size=3))
#     model.add(GlobalAveragePooling1D())
#     model.add(Dropout(0.5))
# #     model.add(Flatten())
#     model.add(Dense(len(chars), activation='sigmoid'))


    optimizer = RMSprop(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(10):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

if __name__ == '__main__':
    print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

    model = build_model(maxlen, chars)

    import ipdb;

    model.fit(
        x,
        y,
        batch_size=128,
        epochs=1,
        callbacks=[print_callback]
    )
    ipdb.set_trace()

corpus length: 600893
total chars: 57
nb sequences: 200285
Vectorization...




Build model...
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "her "categorical,"
as old kant wished (c"
her "categorical,"
as old kant wished (conter and 
----- diversity: 0.5
----- Generating with seed: "her "categorical,"
as old kant wished (c"
her "categorical,"
as old kant wished (cous eat in
----- diversity: 1.0
----- Generating with seed: "her "categorical,"
as old kant wished (c"
her "categorical,"
as old kant wished (cordean and
----- diversity: 1.2
----- Generating with seed: "her "categorical,"
as old kant wished (c"
her "categorical,"
as old kant wished (cempresammy



sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.10/bdb.py", line 336, in set_trace
    sys.settrace(self.trace_dispatch)



--Return--
None
> [0;32m<ipython-input-3-0a5028eb9ca3>[0m(147)[0;36m<cell line: 133>[0;34m()[0m
[0;32m    145 [0;31m        [0mcallbacks[0m[0;34m=[0m[0;34m[[0m[0mprint_callback[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    146 [0;31m    )
[0m[0;32m--> 147 [0;31m    [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
