In [1]:
import sys
import io
import random
import numpy as np
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
text = open('song_lyrics.txt', 'r').read().lower()
print('Corpus Length:', len(text))
text[:500]

Corpus Length: 196350


'"hot air balloon"\n\nwe wrote a prelude to our own fairytale\nand bought a parachute at a church rummage sale.\nand with a mean sewing machine and miles of thread\nwe sewed the day above l.a. in navy and red.\n\nwe round a racetrack through your mom\'s kitchen chairs\nand fought the shadows back down your dark basement stairs.\ni lit a match and let it catch to light up the room.\nand then you yelled as we beheld an old maroon.\n\nhot air balloon.\n\ni\'ll be out of my mind,\nand you\'ll be out of ideas pretty so'

In [3]:
chars = sorted(list(set(text)))
print('Total Chars:', len(chars))
print(chars)
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Total Chars: 61
['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '8', '9', ':', ';', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¢', '¦', '©', '¼', 'â', 'ã', '˜', '€', '™']


In [4]:
# Cut the text in semi-redundant sequences of maxlen characters
maxlen = 200
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of Sequences:', len(sentences))
print(sentences[:5])

Number of Sequences: 65437
['"hot air balloon"\n\nwe wrote a prelude to', 't air balloon"\n\nwe wrote a prelude to ou', 'ir balloon"\n\nwe wrote a prelude to our o', 'balloon"\n\nwe wrote a prelude to our own ', 'loon"\n\nwe wrote a prelude to our own fai']


In [5]:
# Vectorization
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [6]:
# Creating the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
optimizer = RMSprop(0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Instructions for updating:
Colocations handled automatically by placer.


In [7]:
def sample(preds, temperature=1.0):
    # Helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print('--------------------------------------')
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- Diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=15,
          callbacks=[print_callback])

Instructions for updating:
Use tf.cast instead.
Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "er pot
i'll pack my bags and off i'll go"


NameError: name 'sys' is not defined