In [13]:
%matplotlib inline
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import time
import csv
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, SimpleRNN
from keras.layers.wrappers import TimeDistributed
import keras.callbacks
from keras import optimizers
import argparse
import pickle
from RNN_utils import *



In [14]:
DEFAULT_LAYERS = 2
DEFAULT_HIDDEN = 500
DEFAULT_DROPOUT = 0.0

layers = [1, 3]
hidden = [300, 700]
dropout = [0.15, 0.3]
DATA_DIR = './data/shakespeare_input.txt'
BATCH_SIZE = 30
SEQ_LENGTH = 50

num_epochs = 50

# Creating training data
X, y, VOCAB_SIZE, ix_to_char, char_to_ix = load_data(DATA_DIR, SEQ_LENGTH)

Data length: 4573338 characters
Vocabulary size: 67 characters


In [15]:
# Split testing data from training/validation data
train_split = 0.8
test_ind = int(round(train_split*len(X)))
X_test = X[test_ind:]
y_test = y[test_ind:]
X_train = X[:test_ind]
y_train = y[:test_ind]

In [16]:
def make_lstm_model(VOCAB_SIZE, num_layers, num_hidden, dropout):
    model = Sequential()
    model.add(LSTM(num_hidden, input_shape=(None, VOCAB_SIZE), return_sequences=True, dropout=dropout))
    for i in range(num_layers - 1):
      model.add(LSTM(num_hidden, return_sequences=True,dropout=dropout))
    model.add(TimeDistributed(Dense(VOCAB_SIZE)))
    model.add(Activation('softmax'))
    model.compile(loss="categorical_crossentropy", optimizer="rmsprop")
    return model

In [17]:
def run_model(model, X, y, num_epochs=50, batch_size=30):
    early_stop1 = keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=1e-3,
                              patience=2,
                              verbose=1, mode='auto')
    early_stop2 = keras.callbacks.EarlyStopping(monitor='loss',
                              min_delta=1e-3,
                              patience=2,
                              verbose=1, mode='auto')
    history = model.fit(X, y, batch_size=batch_size, callbacks=[early_stop1, early_stop2], validation_split=0.2, epochs=num_epochs, verbose=1)
    return model, history.history

In [18]:
def save_model(model, history, num_layers, num_hidden, dropout):
    string = 'layers_{}_hidden_{}_dropout_{}_epoch_{}'.format(num_layers, num_hidden, int(dropout*10), len(history['loss']))
    print('saving: ' + string)
    with open('history_'+string, 'wb') as file_pi:
        pickle.dump(history, file_pi)
    model.save_weights('weights_'+string+'.hdf5')

In [7]:
default_model = make_lstm_model(VOCAB_SIZE, DEFAULT_LAYERS, DEFAULT_HIDDEN, DEFAULT_DROPOUT)

In [19]:
generate_text(default_model, 100, VOCAB_SIZE, ix_to_char, -1, 1)

refore,
'Riest Soly Bricher and, a land shall,
On every piece of the off the poison.

GLOUCESTER:
Lo

"refore,\n'Riest Soly Bricher and, a land shall,\nOn every piece of the off the poison.\n\nGLOUCESTER:\nLoo"

In [9]:
default_model, default_history = run_model(default_model, X_train, y_train)
save_model(default_model, default_history, DEFAULT_LAYERS, DEFAULT_HIDDEN, DEFAULT_DROPOUT)

Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 00006: early stopping


In [10]:
for layer in layers:
    model = make_lstm_model(VOCAB_SIZE, layer, DEFAULT_HIDDEN, DEFAULT_DROPOUT)
    model, history = run_model(model, X_train, y_train)
    history['test_loss_predict'] = evaluate_loss(model, X_test)
    history['test_loss_eval'] = model.evaluate(X_test, y_test, batch_size=30)
    save_model(model, history, layer, DEFAULT_HIDDEN, DEFAULT_DROPOUT)
    

Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 00009: early stopping
Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping


In [11]:
for h in hidden:
    model = make_lstm_model(VOCAB_SIZE, DEFAULT_LAYERS, h, DEFAULT_DROPOUT)
    model, history = run_model(model, X_train, y_train)
    history['test_loss_predict'] = evaluate_loss(model, X_test)
    history['test_loss_eval'] = model.evaluate(X_test, y_test, batch_size=30)
    save_model(model, history, DEFAULT_LAYERS, h, DEFAULT_DROPOUT)
    

Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: early stopping
Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping


In [None]:
for d in dropout:
    model = make_lstm_model(VOCAB_SIZE, DEFAULT_LAYERS, DEFAULT_HIDDEN, d)
    model, history = run_model(model, X_train, y_train)
    history['test_loss_predict'] = evaluate_loss(model, X_test)
    history['test_loss_eval'] = model.evaluate(X_test, y_test, batch_size=30)
    save_model(model, history, DEFAULT_LAYERS, DEFAULT_HIDDEN, d)
    

Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: early stopping
Train on 58538 samples, validate on 14635 samples
Epoch 1/50
 8550/58538 [===>..........................] - ETA: 10:21 - loss: 2.9554

In [None]:
# just run second dropout because it got stuck for some reason
model = make_lstm_model(VOCAB_SIZE, DEFAULT_LAYERS, DEFAULT_HIDDEN, dropout[1])
model, history = run_model(model, X_train, y_train)
history['test_loss_predict'] = evaluate_loss(model, X_test)
history['test_loss_eval'] = model.evaluate(X_test, y_test, batch_size=30)
save_model(model, history, DEFAULT_LAYERS, DEFAULT_HIDDEN, dropout[1])
    

Train on 58538 samples, validate on 14635 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
 2040/58538 [>.............................] - ETA: 11:38 - loss: 1.3885