In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
from keras import backend as K
K.set_image_dim_ordering('th')
import numpy as np
from PIL import Image
import argparse
import math

np.random.seed(27)

Using TensorFlow backend.


In [2]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [4]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [8]:
dataX


[[0],
 [1],
 [2],
 [3],
 [4],
 [5],
 [6],
 [7],
 [8],
 [9],
 [10],
 [11],
 [12],
 [13],
 [14],
 [15],
 [16],
 [17],
 [18],
 [19],
 [20],
 [21],
 [22],
 [23],
 [24]]

In [9]:
(len(dataX), seq_length, 1)

(25, 1, 1)

In [7]:
X = np.reshape(dataX, (len(dataX), seq_length, 1))
X

array([[[ 0]],

       [[ 1]],

       [[ 2]],

       [[ 3]],

       [[ 4]],

       [[ 5]],

       [[ 6]],

       [[ 7]],

       [[ 8]],

       [[ 9]],

       [[10]],

       [[11]],

       [[12]],

       [[13]],

       [[14]],

       [[15]],

       [[16]],

       [[17]],

       [[18]],

       [[19]],

       [[20]],

       [[21]],

       [[22]],

       [[23]],

       [[24]]])

In [10]:
X / float(len(alphabet))

array([[[ 0.        ]],

       [[ 0.03846154]],

       [[ 0.07692308]],

       [[ 0.11538462]],

       [[ 0.15384615]],

       [[ 0.19230769]],

       [[ 0.23076923]],

       [[ 0.26923077]],

       [[ 0.30769231]],

       [[ 0.34615385]],

       [[ 0.38461538]],

       [[ 0.42307692]],

       [[ 0.46153846]],

       [[ 0.5       ]],

       [[ 0.53846154]],

       [[ 0.57692308]],

       [[ 0.61538462]],

       [[ 0.65384615]],

       [[ 0.69230769]],

       [[ 0.73076923]],

       [[ 0.76923077]],

       [[ 0.80769231]],

       [[ 0.84615385]],

       [[ 0.88461538]],

       [[ 0.92307692]]])

In [11]:
np_utils.to_categorical(dataY)

array([[ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [None]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Naive LSTM for a Three-Char Feature Window to One-Char Mapping

In [13]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [18]:
(len(dataX), 1, seq_length)

(23, 1, 3)

In [19]:
np.reshape(dataX, (len(dataX), 1, seq_length))

array([[[ 0,  1,  2]],

       [[ 1,  2,  3]],

       [[ 2,  3,  4]],

       [[ 3,  4,  5]],

       [[ 4,  5,  6]],

       [[ 5,  6,  7]],

       [[ 6,  7,  8]],

       [[ 7,  8,  9]],

       [[ 8,  9, 10]],

       [[ 9, 10, 11]],

       [[10, 11, 12]],

       [[11, 12, 13]],

       [[12, 13, 14]],

       [[13, 14, 15]],

       [[14, 15, 16]],

       [[15, 16, 17]],

       [[16, 17, 18]],

       [[17, 18, 19]],

       [[18, 19, 20]],

       [[19, 20, 21]],

       [[20, 21, 22]],

       [[21, 22, 23]],

       [[22, 23, 24]]])

In [None]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), 1, seq_length))


Naive LSTM for a Three-Char Time Step Window to One-Char Mapping

In [21]:
np.reshape(dataX, (len(dataX), seq_length, 1))

array([[[ 0],
        [ 1],
        [ 2]],

       [[ 1],
        [ 2],
        [ 3]],

       [[ 2],
        [ 3],
        [ 4]],

       [[ 3],
        [ 4],
        [ 5]],

       [[ 4],
        [ 5],
        [ 6]],

       [[ 5],
        [ 6],
        [ 7]],

       [[ 6],
        [ 7],
        [ 8]],

       [[ 7],
        [ 8],
        [ 9]],

       [[ 8],
        [ 9],
        [10]],

       [[ 9],
        [10],
        [11]],

       [[10],
        [11],
        [12]],

       [[11],
        [12],
        [13]],

       [[12],
        [13],
        [14]],

       [[13],
        [14],
        [15]],

       [[14],
        [15],
        [16]],

       [[15],
        [16],
        [17]],

       [[16],
        [17],
        [18]],

       [[17],
        [18],
        [19]],

       [[18],
        [19],
        [20]],

       [[19],
        [20],
        [21]],

       [[20],
        [21],
        [22]],

       [[21],
        [22],
        [23]],

       [[22],
        [23],
    

In [None]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# create and fit the model
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2)
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))
# demonstrate some model predictions
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print seq_in, "->", result