In [17]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils

In [2]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [47]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
	seq_in = alphabet[i:i + seq_length]
	seq_out = alphabet[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])

In [48]:
dataX

[[0, 1, 2],
 [1, 2, 3],
 [2, 3, 4],
 [3, 4, 5],
 [4, 5, 6],
 [5, 6, 7],
 [6, 7, 8],
 [7, 8, 9],
 [8, 9, 10],
 [9, 10, 11],
 [10, 11, 12],
 [11, 12, 13],
 [12, 13, 14],
 [13, 14, 15],
 [14, 15, 16],
 [15, 16, 17],
 [16, 17, 18],
 [17, 18, 19],
 [18, 19, 20],
 [19, 20, 21],
 [20, 21, 22],
 [21, 22, 23],
 [22, 23, 24]]

In [11]:
dataY

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25]

In [59]:
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
X.shape

(23, 3, 1)

In [60]:
X

array([[[ 0],
        [ 1],
        [ 2]],

       [[ 1],
        [ 2],
        [ 3]],

       [[ 2],
        [ 3],
        [ 4]],

       [[ 3],
        [ 4],
        [ 5]],

       [[ 4],
        [ 5],
        [ 6]],

       [[ 5],
        [ 6],
        [ 7]],

       [[ 6],
        [ 7],
        [ 8]],

       [[ 7],
        [ 8],
        [ 9]],

       [[ 8],
        [ 9],
        [10]],

       [[ 9],
        [10],
        [11]],

       [[10],
        [11],
        [12]],

       [[11],
        [12],
        [13]],

       [[12],
        [13],
        [14]],

       [[13],
        [14],
        [15]],

       [[14],
        [15],
        [16]],

       [[15],
        [16],
        [17]],

       [[16],
        [17],
        [18]],

       [[17],
        [18],
        [19]],

       [[18],
        [19],
        [20]],

       [[19],
        [20],
        [21]],

       [[20],
        [21],
        [22]],

       [[21],
        [22],
        [23]],

       [[22],
        [23],
    

In [61]:
# normalize
X = X / float(len(alphabet))

In [62]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [63]:
# create and fit the model
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, nb_epoch=500, batch_size=1, verbose=0)

<keras.callbacks.History at 0x7f3b58a0fba8>

In [64]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


In [68]:
pattern = dataX[1]
x = numpy.reshape(pattern, (1, len(pattern), 1))
x = x / float(len(alphabet))
x

array([[[ 0.03846154],
        [ 0.07692308],
        [ 0.11538462]]])

In [69]:
prediction = model.predict(x, verbose=0)
prediction

array([[  2.30859627e-07,   2.94673612e-07,   2.67451412e-07,
          1.34671433e-02,   9.50702250e-01,   3.11065167e-02,
          3.01549467e-03,   7.67497928e-04,   3.67985340e-05,
          6.22291700e-04,   1.58217663e-05,   2.19861322e-05,
          2.47905446e-05,   1.44458199e-05,   4.57185115e-06,
          5.96856398e-06,   6.14463988e-06,   1.25895917e-06,
          1.67889084e-07,   5.00498174e-07,   5.82675561e-08,
          7.93996335e-08,   1.61424865e-07,   1.14425836e-06,
          5.51155317e-05,   1.28848784e-04]], dtype=float32)

In [70]:
index = numpy.argmax(prediction)
result = int_to_char[index]
result

'E'

In [71]:
seq_in = [int_to_char[value] for value in pattern]
seq_in

['B', 'C', 'D']