In [1]:
'''
MLP character model. Code adapted from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


def build_model(maxlen, chars):
    # build the model: an MLP with 1 hidden layer
    print('Build model...')
    model = Sequential()
    model.add(Flatten(input_shape=(maxlen, len(chars))))
    model.add(Dense(128))
    model.add(Dense(len(chars)))
    model.add(Activation('softmax'))

    optimizer = RMSprop(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

if __name__ == '__main__':
    print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

    model = build_model(maxlen, chars)

    import ipdb; 
    
    model.fit(
        x,
        y,
        batch_size=128,
        epochs=60,
        callbacks=[print_callback]
    )
    ipdb.set_trace()

Using TensorFlow backend.


corpus length: 600893
total chars: 57
nb sequences: 200285
Vectorization...
Build model...
Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: ", that he has
acted in everything with m"
, that he has
acted in everything with mes in o athise vid is thithe this dore phis phis perate, and med insily this chithis chithis aen of the and encof in is of the sumely an as and and an thithe an in the and al the menthe and ved hich many his sicheras and aus and ind in the as of as ale med an the s male s mal ad as in med ins an as and ans and amen mas mont of ill and mesthit mof in the as of as in the as ind is the
desthis ches i
----- diversity: 0.5
----- Generating with seed: ", that he has
acted in everything with m"
, that he has
acted in everything with med miny hathof ce alo ghe pesely his the dof and aumely; and iund at ouly is his de are
at oo phis siere aly acithe forthist at of mh thiche s alides chithiss lo shis me this apself ale st
le por

expression itsenat ao here ofllaks timatry mhilltof thether usterming cems er
juct oe cawe co temptempente. at ure the hher sumalf treeramse frs ifstive ing aimols (whe be burino  ; el? gever aus ones uhlor th esmune tha thei inis of the whem thmure thes act bulkeme aed
mathereledentinol, iad on hheply ef therst hhe vir hafor. the wore hamasithet ableaition,
amporeercumanwestablingrmuther ferentirkenf.--thisod, 
----- diversity: 1.2
----- Generating with seed: "ition, without which the
expression itse"
ition, without which the
expression itseiadl mebline, ondurouf l(tek of thr ahs, alr chumrmas resod meraukent, the
what a bemprrald
cel awo iha belcremple spertfenedve, in shamurthe  ofmuntefimmevery-dof ogr -he mulkerw
ans dree: tingin thedregezaty anepuvesso-ithilur futh upcass.ays arbe ce hal as turelomeea i ss af, astthece,"bud." iess wheatag  phamos meot ta thewhwin at aris
sunt atine ofylesans cherilf revelf roulf all asts biekvog
Epoch 5/60

----- Generating text after Epoch: 4
--

s an whin the chas an f of the s an fin the bat he mon tho s an the pacho t al an on be on whing ion an on the be no serf che f an the ghe sons is an on ar an is ble in s an an the s an of an  he mang the 
----- diversity: 0.5
----- Generating with seed: "t does not require deliberation. as soon"
t does not require deliberation. as soon the go at en the
men or fot in been co silf and rers al se fereelo d se foching on mas in

f co the s of the s an in all on ha derigro for the an thos the whil  he ha phes and an on geres tho g of the beco fer in pionos ing ang at ches io bat of mon  hangitho es in ses ace be meres in whe he se fer me pras an s of mant in sals an
beleso thit an chon thon the profist he we he for thin ther and sil
----- diversity: 1.0
----- Generating with seed: "t does not require deliberation. as soon"
t does not require deliberation. as soon chow mas amer rod fove to geo
s f if noldon nhtincasise
fes pr eos of ofsctin sof en isre, one ss s btit ces micho woch aquele w

moficapenithivedupapars, ife mmeerompos fftremons chictianc, mo nawheen the arivisulo ) esimin. 

arlupties, the afeer eeffeed bekin weverios thit peliopulintwm, ofthaangutraashait sspuntoki; thas of endensscur belels eovin thaspsivelobyonlthith"lhace shithecenadivica ce the reos
or hare morag do cerancounmosasils, aan
hareva be hod hes thorigh, en beerpatly, fhe hor bet iblousrycons, bedde
Epoch 12/60

----- Generating text after Epoch: 11
----- diversity: 0.2
----- Generating with seed: "yspeptics like what
is convenient, so th"
yspeptics like what
is convenient, so the stel and mone the bele ment ef reste the
belist, the mast the sto the the belles monem ant

hally, and anlleges ant ous mas inle sos las bee
mest--be the be bes morell to mare the sempresiby beente se bet ored romestint and mathe steveredis the mant of the stee
the make wist us monk of verelt experted and mand ins as the ste tave the stever, the sallets of the stee be the sall toul the
salt ant 
----- diversity: 0.5
-

KeyboardInterrupt: 

In [2]:
'''
Conv1D character model. Code adapted from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from keras.layers import Conv1D, MaxPooling1D
import numpy as np
import random
import sys
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# print(x.shape[0])
# print(x.shape[1])
# print(x.shape[2])


def build_model(maxlen, chars):
    # build the model: an MLP with 1 hidden layer
    print('Build model...')
    model = Sequential()
    
#     model.add(Flatten(input_shape=(maxlen, len(chars))))
#     model.add(Dense(128))
#     model.add(Dense(len(chars)))
#     model.add(Activation('softmax'))
    # CNN model
    
    model.add(Conv1D(64, 3, activation='relu',input_shape=(maxlen, len(chars))))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=3))
    print(model.layers[0].output_shape)
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=3))

#     model.add(Flatten(input_shape=(maxlen, len(chars))))
    model.add(Flatten())
#     model.add(Dense(256, activation='relu'))
    model.add(Dense(len(chars), activation='softmax'))    
    

    optimizer = RMSprop(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(1):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

if __name__ == '__main__':
    print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

    model = build_model(maxlen, chars)

    import ipdb; 
    
    model.fit(
        x,
        y,
        batch_size=128,
        epochs=60,
        callbacks=[print_callback]
    )
    ipdb.set_trace()

corpus length: 600893
total chars: 57
nb sequences: 200285
Vectorization...
Build model...
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
(None, 38, 64)
Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ges. many acts are called bad
that are o"
ges. many acts are called bad
that are o 
----- diversity: 0.5
----- Generating with seed: "ges. many acts are called bad
that are o"
ges. many acts are called bad
that are ot
----- diversity: 1.0
----- Generating with seed: "ges. many acts are called bad
that are o"
ges. many acts are called bad
that are o 
----- diversity: 1.2
----- Generating with seed: "ges. many acts are called bad
that are o"
ges. many acts are called bad
that are ou
Epoch 2/60

----- Generating text after Epoch: 1
----- diversity: 0.2
----- Generating with seed: "gris from a higher inclination--whoever "
gris from a higher inclination--whoever e
----- diversity: 0.5
----- Generating with 

in a mit
----- diversity: 1.2
----- Generating with seed: " they saw but the counterpart as
in a mi"
 they saw but the counterpart as
in a mi 
Epoch 13/60

----- Generating text after Epoch: 12
----- diversity: 0.2
----- Generating with seed: " seems to him to be as unalterable as hi"
 seems to him to be as unalterable as hi 
----- diversity: 0.5
----- Generating with seed: " seems to him to be as unalterable as hi"
 seems to him to be as unalterable as hin
----- diversity: 1.0
----- Generating with seed: " seems to him to be as unalterable as hi"
 seems to him to be as unalterable as hid
----- diversity: 1.2
----- Generating with seed: " seems to him to be as unalterable as hi"
 seems to him to be as unalterable as hi,
Epoch 14/60

----- Generating text after Epoch: 13
----- diversity: 0.2
----- Generating with seed: "rance
of napoleon. there are words of go"
rance
of napoleon. there are words of go 
----- diversity: 0.5
----- Generating with seed: "rance
of napoleon. there are words 

KeyboardInterrupt: 

In [17]:
'''Example script to generate text from Nietzsche's writings.

At least 20 epochs are required before the generated text
starts sounding coherent.

It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.

If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 400)
x_test shape: (25000, 400)
Build model...
Train on 25000 samples, validate on 25000 samples
Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 