In [1]:
import keras

Using TensorFlow backend.


In [2]:
from keras.models import Sequential

In [3]:
from keras.layers import LSTM, Dense, Dropout

In [4]:
from keras.callbacks import ModelCheckpoint

In [5]:
from keras.utils import np_utils

In [6]:
import numpy as np

In [7]:
SEQ_LENGTH = 100

Now that we've imported everything we need form Keras, we're all set to go!

First, we load our data.

In [8]:
def load_data(filename):
    data        = open(filename).read()
    data        = data.lower()
    
    # Find all the unique characters
    chars       = sorted(list(set(data)))
    char_to_int = dict((c, i) for i, c in enumerate(chars))
    int_to_char = dict((i, c) for i, c in enumerate(chars))
    vocab_size  = len(chars)
    
    list_X      = []
    list_Y      = []
    # Python append is faster than numpy append
    for i in range(0, len(data) - SEQ_LENGTH, 1):
        seq_in  = data[i : i + SEQ_LENGTH]
        seq_out = data[i + SEQ_LENGTH]
        list_X.append([char_to_int[char] for char in seq_in])
        list_Y.append(char_to_int[seq_out])
    
    n_patterns  = len(list_X)

    X           = np.reshape(list_X, (n_patterns, SEQ_LENGTH, 1))

    # Encode output as one-hot vector
    Y           = np_utils.to_categorical(list_Y)

    return X, Y, int_to_char, vocab_size

This functions returns an array of sequences from the input text file and the corresponding output for each sequence encoded as a one-hot vector.

Now we add a function to create our LSTM.

In [9]:
def create_model(n_hidden_layers, input_shape, hidden_dim, n_out, **kwargs):
    drop        = kwargs.get('drop_rate', 0.2)
    activ       = kwargs.get('activation', 'softmax')
    mode        = kwargs.get('mode', 'train')
    hidden_dim  = int(hidden_dim)
    model       = Sequential()
    flag        = True

    if (n_hidden_layers == 0):
        flag = False
    
    model.add( LSTM(hidden_dim, input_shape = (input_shape[1], input_shape[2]), return_sequences = flag ) )
    if mode == 'train':
        model.add( Dropout(drop) )

    for i in range(n_hidden_layers - 1):
        model.add( LSTM(hidden_dim, return_sequences = True) )
        if mode == 'train':
            model.add( Dropout(drop) )

    if (n_hidden_layers == 1):
        model.add( LSTM(hidden_dim) )
    if mode == 'train':
        model.add( Dropout(drop) )

    model.add( Dense(n_out, activation = activ) )

    return model

Now we train our model.

In [10]:
def train(model, X, Y, n_epochs, b_size, vocab_size, **kwargs):    
    loss            = kwargs.get('loss', 'categorical_crossentropy')
    opt             = kwargs.get('optimizer', 'adam')
    
    model.compile(loss = loss, optimizer = opt)

    filepath        = "Weights/weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint      = ModelCheckpoint(filepath, monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')
    callbacks_list  = [checkpoint]
    X               = X / float(vocab_size)
    model.fit(X, Y, epochs = n_epochs, batch_size = b_size, callbacks = callbacks_list)

The fit function will run the input batchwase n_epochs number of times and it will save the weights to a file whenever there is an improvement. This is taken care of through the callback. <br><br>
After the training is done or once you find a loss that you are happy with, you can test how well the model generates text.

In [11]:
def generate_text(model, X, filename, ix_to_char, vocab_size):
    
    # Load the weights from the epoch with the least loss
    model.load_weights(filename)
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

    start   = np.random.randint(0, len(X) - 1)
    pattern = np.ravel(X[start]).tolist()

    # We seed the model with a random sequence of 100 so it can start predicting
    print ("Seed:")
    print ("\"", ''.join([ix_to_char[value] for value in pattern]), "\"")
    output = []
    for i in range(250):
        x           = np.reshape(pattern, (1, len(pattern), 1))
        x           = x / float(vocab_size)
        prediction  = model.predict(x, verbose = 0)
        index       = np.argmax(prediction)
        result      = index
        output.append(result)
        pattern.append(index)
        pattern = pattern[1 : len(pattern)]

    print("Predictions")
    print ("\"", ''.join([ix_to_char[value] for value in output]), "\"")

Now we're ready to either train our test our model.

In [12]:
X, Y, ix_to_char, vocab_size = load_data('data/game_of_thrones.txt')
print(vocab_size)
print(X[0])
print(Y[0])

39
[[35]
 [17]
 [ 1]
 [31]
 [20]
 [27]
 [33]
 [24]
 [16]
 [ 1]
 [31]
 [32]
 [13]
 [30]
 [32]
 [ 1]
 [14]
 [13]
 [15]
 [23]
 [ 7]
 [ 1]
 [19]
 [13]
 [30]
 [17]
 [16]
 [ 1]
 [33]
 [30]
 [19]
 [17]
 [16]
 [ 1]
 [13]
 [31]
 [ 1]
 [32]
 [20]
 [17]
 [ 1]
 [35]
 [27]
 [27]
 [16]
 [31]
 [ 1]
 [14]
 [17]
 [19]
 [13]
 [26]
 [ 1]
 [32]
 [27]
 [ 1]
 [19]
 [30]
 [27]
 [35]
 [ 1]
 [16]
 [13]
 [30]
 [23]
 [ 1]
 [13]
 [30]
 [27]
 [33]
 [26]
 [16]
 [ 1]
 [32]
 [20]
 [17]
 [25]
 [ 9]
 [ 0]
 [32]
 [20]
 [17]
 [ 1]
 [35]
 [21]
 [24]
 [16]
 [24]
 [21]
 [26]
 [19]
 [31]
 [ 1]
 [13]
 [30]
 [17]
 [ 1]
 [16]
 [17]
 [13]]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.]


In [13]:
print("Shape of input data ", X.shape, "\nShape of output data ", Y.shape)

Shape of input data  (1567119, 100, 1) 
Shape of output data  (1567119, 39)


In [17]:
model   = create_model(1, X.shape, 256, Y.shape[1], drop_rate = 0.1, activation = 'softmax', mode = 'test')

In [19]:
#train(model, X, Y, 20, 128, vocab_size)
generate_text(model, X, "Weights/weights-improvement-05-1.5560.hdf5", ix_to_char, vocab_size)

Seed:
" your attacks.
the more time he spent with them, the more jon despised them.
inside, jon hung sword a "
Predictions
" nd she had been the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the starks and she was a sound of the sears of the starks and she was a sound of the sears of the starks and she was a sound of th "
