# LSTM Training

This Jupyter notebook is used to train the LSTM. Things that we tried are:
* compare adams optimizer vs rmsprop optimizer
* change batch size
* change number of units
* change training epochs
* pre-process text data: all lowercase, remove punctuation. This is done in another Jupyter notebook.

In [2]:
from numpy import array
from pickle import load
from pickle import dump
from keras.utils import to_categorical
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import text_to_word_sequence

In [3]:
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

In [5]:
text = load_doc('../data/shakespeare_LSTM_lower_nopunc.txt')

# create character <--> integer mapping
chars = sorted(list(set(text)))
mapping = dict((c, i) for i, c in enumerate(chars))
vocab_size = len(mapping)
print('Vocabulary Size = %d' % vocab_size)

# generate sequences
seq_length = 40
jump = 1
seqs = list()
for i in range(seq_length, len(text), jump):
    # select sequence of tokens
    seq = text[i-seq_length: i+1]
    # store
    seqs.append(seq)

print('Number of Sequences: %d' % len(seqs))

# encode sequences
sequences = list()
for line in seqs:
    encoded_seq = [mapping[char] for char in line]
    sequences.append(encoded_seq)

Vocabulary Size = 32
Number of Sequences: 91081


In [5]:
# separate into inputs and outputs as one-hot encoded vectors
sequences = array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = array(sequences)
y = to_categorical(y, num_classes=vocab_size)

In [6]:
# define model
model = Sequential()
model.add(LSTM(150, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())

# compile model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# train model
model.fit(X, y, batch_size=64, epochs=100, verbose=2)

# save the model to file
model.save('shakespeare_LSTM_model_v7.h5')

# save the mapping
dump(mapping, open('shakespeare_mapping_v7.pkl', 'wb'))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 150)               110400    
_________________________________________________________________
dense_1 (Dense)              (None, 33)                4983      
Total params: 115,383
Trainable params: 115,383
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
 - 139s - loss: 2.4102 - accuracy: 0.3030
Epoch 2/100
 - 139s - loss: 2.0274 - accuracy: 0.3952
Epoch 3/100
 - 133s - loss: 1.8579 - accuracy: 0.4421
Epoch 4/100
 - 130s - loss: 1.7594 - accuracy: 0.4675
Epoch 5/100
 - 127s - loss: 1.6866 - accuracy: 0.4863
Epoch 6/100
 - 131s - loss: 1.6285 - accuracy: 0.5017
Epoch 7/100
 - 130s - loss: 1.5778 - accuracy: 0.5149
Epoch 8/100
 - 130s - loss: 1.5314 - accuracy: 0.5283
Epoch 9/100
 - 131s - loss: 1.4916 - accuracy: 0.5390
Epoch 10/100
 - 133