In [9]:
from keras.callbacks import LambdaCallback
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import RMSprop
import numpy as np
import random
import sys
import io
from __future__ import print_function

In [10]:
with open('showerthoughts.txt', 'r') as raw_file:
    text = raw_file.read().lower()
print('total num of chars:', len(text))

total num of chars: 107073


In [11]:
# create map of unique characters to indices
chars = sorted(list(set(text)))
print('total unique chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total unique chars: 73


In [12]:
# Preprocess text into training data
# Given sequence of chars, predict next char

sequence_len = 40
step = 3
sequences = []
next_chars = []
for i in range(0, len(text) - sequence_len, step):
    sequences.append(text[i: i + sequence_len])
    next_chars.append(text[i + sequence_len])
print('num of sequences:', len(sequences))

num of sequences: 35678


In [13]:
# Vectorization

X = np.zeros((len(sequences), sequence_len, len(chars)), dtype=np.bool)
y = np.zeros((len(sequences), len(chars)), dtype=np.bool)
for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [14]:
# Define the LSTM model

model = Sequential()
model.add(LSTM(256, input_shape=(sequence_len, len(chars))))
model.add(Dropout(0.2))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [15]:
# Define callbacks

checkpoint_filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

In [16]:
# Train model

model.fit(X, y, epochs=20, batch_size=128, callbacks=[checkpoint])

Epoch 1/20

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ve charged our phones in cars
airports h"
ve charged our phones in cars
airports he t the s pe the the the the the the th the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the s the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the th
----- diversity: 0.5
----- Generating with seed: "ve charged our phones in cars
airports h"
ve charged our phones in cars
airports he as ll lels te se thes the the le the se the the the are the ge all inlt ly f she the s in s an the the ble po car th ant in cle thes s in thind theas the the me the l te mor d ta n s s the wile the theinge ame the the ye as pe thet t thelg the the the ins the s the te the 
s the le w

KeyboardInterrupt: 