In [22]:
# Larger LSTM Network to Generate Text for "Men in The Sun"
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical

In [23]:
# load ascii text and covert to lowercase
filename = "men_in_the_sun_edited.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))


In [24]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)


Total Characters:  71818
Total Vocab:  61


In [25]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  71718


In [26]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [27]:
# define model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_15 (LSTM)               (None, 100, 256)          264192    
_________________________________________________________________
lstm_16 (LSTM)               (None, 100, 512)          1574912   
_________________________________________________________________
dropout_12 (Dropout)         (None, 100, 512)          0         
_________________________________________________________________
lstm_17 (LSTM)               (None, 100, 512)          2099200   
_________________________________________________________________
dropout_13 (Dropout)         (None, 100, 512)          0         
_________________________________________________________________
lstm_18 (LSTM)               (None, 100, 512)          2099200   
_________________________________________________________________
dropout_14 (Dropout)         (None, 100, 512)         

In [28]:
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [29]:
# define the checkpoint
filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(
    filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]


In [30]:
# fit the model
model.fit(X, y, epochs=60, batch_size=512, callbacks=callbacks_list)
# model.fit(X, y, batch_size=512, epochs=50)

Epoch 1/60
Epoch 00001: loss improved from inf to 3.20243, saving model to weights-improvement-01-3.2024-bigger.hdf5
Epoch 2/60
Epoch 00002: loss improved from 3.20243 to 3.15818, saving model to weights-improvement-02-3.1582-bigger.hdf5
Epoch 3/60
Epoch 00003: loss improved from 3.15818 to 3.15446, saving model to weights-improvement-03-3.1545-bigger.hdf5
Epoch 4/60
Epoch 00004: loss improved from 3.15446 to 3.15090, saving model to weights-improvement-04-3.1509-bigger.hdf5
Epoch 5/60
Epoch 00005: loss improved from 3.15090 to 3.14968, saving model to weights-improvement-05-3.1497-bigger.hdf5
Epoch 6/60
Epoch 00006: loss improved from 3.14968 to 3.14380, saving model to weights-improvement-06-3.1438-bigger.hdf5
Epoch 7/60
Epoch 00007: loss improved from 3.14380 to 2.95477, saving model to weights-improvement-07-2.9548-bigger.hdf5
Epoch 8/60
Epoch 00008: loss improved from 2.95477 to 2.78550, saving model to weights-improvement-08-2.7855-bigger.hdf5
Epoch 9/60
Epoch 00009: loss improve

<tensorflow.python.keras.callbacks.History at 0x7f7bfe392710>