# Problem Description: Learn the Alphabet

In [2]:
import numpy as np

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, LSTM

import tensorflow.keras.utils as utils

In [3]:
# fix random seed for reproducibility
np.random.seed(7)

In [4]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [5]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []

for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [6]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), seq_length, 1))

In [7]:
# normalize
X = X / float(len(alphabet))

In [9]:
# one hot encode the output variable
y = utils.to_categorical(dataY)
y

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 

# Naive LSTM for Learning One-Char to One-Char Mapping

In [10]:
# create and fit the model
model = Sequential()

model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
 - 7s - loss: 3.2655 - acc: 0.0000e+00
Epoch 2/500
 - 0s - loss: 3.2579 - acc: 0.0400
Epoch 3/500
 - 0s - loss: 3.2546 - acc: 0.0400
Epoch 4/500
 - 0s - loss: 3.2517 - acc: 0.0000e+00
Epoch 5/500
 - 0s - loss: 3.2487 - acc: 0.0400
Epoch 6/500
 - 0s - loss: 3.2459 - acc: 0.0400
Epoch 7/500
 - 0s - loss: 3.2429 - acc: 0.0000e+00
Epoch 8/500
 - 0s - loss: 3.2403 - acc: 0.0400
Epoch 9/500
 - 0s - loss: 3.2367 - acc: 0.0400
Epoch 10/500
 - 0s - loss: 3.2333 - acc: 0.0400
Epoch 11/500
 - 0s - loss: 3.2301 - acc: 0.0400
Epoch 12/500
 - 0s - loss: 3.2264 - acc: 0.0400
Epoch 13/500
 - 0s - loss: 3.2226 - acc: 0.0400
Epoch 14/500
 - 0s - loss: 3.2191 - acc: 0.0400
Epoch 15/500
 - 0s - loss: 3.2147 - acc: 0.0400
Epoch 16/500
 - 0s - loss: 3.2112 - acc: 0.0400
Epoch 17/500
 - 0s - loss: 3.2061 - acc: 0.0400
Epoch 18/500
 - 0s - loss: 3.2013 - acc: 0.0400
Epoch 19/500
 - 0s - loss: 3.1959 - acc: 0.0400
Epoch 20/500
 - 0s - loss: 3.1900 - acc: 0.0400
Epoch 21/500
 - 0s - loss: 3.1846 - a

<tensorflow.python.keras.callbacks.History at 0x12e66585cf8>

In [11]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 84.00%


In [12]:
# demonstrate some model predictions
for pattern in dataX:
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    
    index = np.argmax(prediction)
    result = int_to_char[index]
    
    seq_in = [int_to_char[value] for value in pattern]
    
    print(seq_in, "->", result)

['A'] -> B
['B'] -> C
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> V
['U'] -> V
['V'] -> X
['W'] -> Y
['X'] -> Z
['Y'] -> Z


# Naive LSTM for a Three-Char Feature Window to One-Char Mapping

In [19]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []

In [20]:
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [21]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), 1, seq_length))

In [22]:
# normalize
X = X / float(len(alphabet))

In [23]:
# one hot encode the output variable
y = utils.to_categorical(dataY)

In [24]:
# create and fit the model
model = Sequential()

model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
 - 1s - loss: 3.2675 - acc: 0.0435
Epoch 2/500
 - 0s - loss: 3.2564 - acc: 0.0435
Epoch 3/500
 - 0s - loss: 3.2503 - acc: 0.0435
Epoch 4/500
 - 0s - loss: 3.2444 - acc: 0.0435
Epoch 5/500
 - 0s - loss: 3.2390 - acc: 0.0435
Epoch 6/500
 - 0s - loss: 3.2326 - acc: 0.0435
Epoch 7/500
 - 0s - loss: 3.2269 - acc: 0.0435
Epoch 8/500
 - 0s - loss: 3.2207 - acc: 0.0435
Epoch 9/500
 - 0s - loss: 3.2140 - acc: 0.0435
Epoch 10/500
 - 0s - loss: 3.2078 - acc: 0.0000e+00
Epoch 11/500
 - 0s - loss: 3.2008 - acc: 0.0000e+00
Epoch 12/500
 - 0s - loss: 3.1928 - acc: 0.0000e+00
Epoch 13/500
 - 0s - loss: 3.1858 - acc: 0.0435
Epoch 14/500
 - 0s - loss: 3.1769 - acc: 0.0000e+00
Epoch 15/500
 - 0s - loss: 3.1683 - acc: 0.0435
Epoch 16/500
 - 0s - loss: 3.1596 - acc: 0.0000e+00
Epoch 17/500
 - 0s - loss: 3.1501 - acc: 0.0435
Epoch 18/500
 - 0s - loss: 3.1407 - acc: 0.0000e+00
Epoch 19/500
 - 0s - loss: 3.1315 - acc: 0.0000e+00
Epoch 20/500
 - 0s - loss: 3.1227 - acc: 0.0435
Epoch 21/500
 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x12e59073e80>

In [25]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 86.96%


In [27]:
# demonstrate some model predictions
for pattern in dataX:
    x = np.reshape(pattern, (1, 1, len(pattern)))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> X
['U', 'V', 'W'] -> Z
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z


# Naive LSTM for a Three-Char Time Step Window to One-Char Mapping

In [28]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []

In [29]:
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [30]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), seq_length, 1))

In [31]:
# normalize
X = X / float(len(alphabet))

In [32]:
# one hot encode the output variable
y = utils.to_categorical(dataY)

In [33]:
# create and fit the model
model = Sequential()

model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
 - 1s - loss: 3.2733 - acc: 0.0000e+00
Epoch 2/500
 - 0s - loss: 3.2569 - acc: 0.0000e+00
Epoch 3/500
 - 0s - loss: 3.2477 - acc: 0.0000e+00
Epoch 4/500
 - 0s - loss: 3.2387 - acc: 0.0435
Epoch 5/500
 - 0s - loss: 3.2298 - acc: 0.0435
Epoch 6/500
 - 0s - loss: 3.2211 - acc: 0.0435
Epoch 7/500
 - 0s - loss: 3.2104 - acc: 0.0435
Epoch 8/500
 - 0s - loss: 3.2004 - acc: 0.0435
Epoch 9/500
 - 0s - loss: 3.1879 - acc: 0.0435
Epoch 10/500
 - 0s - loss: 3.1765 - acc: 0.0435
Epoch 11/500
 - 0s - loss: 3.1618 - acc: 0.0435
Epoch 12/500
 - 0s - loss: 3.1467 - acc: 0.0435
Epoch 13/500
 - 0s - loss: 3.1298 - acc: 0.0435
Epoch 14/500
 - 0s - loss: 3.1158 - acc: 0.0435
Epoch 15/500
 - 0s - loss: 3.0984 - acc: 0.0435
Epoch 16/500
 - 0s - loss: 3.0814 - acc: 0.0435
Epoch 17/500
 - 0s - loss: 3.0656 - acc: 0.0435
Epoch 18/500
 - 0s - loss: 3.0487 - acc: 0.0435
Epoch 19/500
 - 0s - loss: 3.0333 - acc: 0.0435
Epoch 20/500
 - 0s - loss: 3.0156 - acc: 0.0435
Epoch 21/500
 - 0s - loss: 3.0000 - a

<tensorflow.python.keras.callbacks.History at 0x12fe122f3c8>

In [34]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 95.65%


In [36]:
# demonstrate some model predictions
for pattern in dataX:
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z


# LSTM State Within A Batch

In [41]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [37]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []

In [38]:
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [42]:
# convert list of lists to array and pad sequences if needed
X = pad_sequences(dataX, maxlen=seq_length, dtype='float32')

In [43]:
X

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.],
       [11.],
       [12.],
       [13.],
       [14.],
       [15.],
       [16.],
       [17.],
       [18.],
       [19.],
       [20.],
       [21.],
       [22.],
       [23.],
       [24.]], dtype=float32)

In [45]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (X.shape[0], seq_length, 1))
X

array([[[ 0]],

       [[ 1]],

       [[ 2]],

       [[ 3]],

       [[ 4]],

       [[ 5]],

       [[ 6]],

       [[ 7]],

       [[ 8]],

       [[ 9]],

       [[10]],

       [[11]],

       [[12]],

       [[13]],

       [[14]],

       [[15]],

       [[16]],

       [[17]],

       [[18]],

       [[19]],

       [[20]],

       [[21]],

       [[22]],

       [[23]],

       [[24]]])

In [46]:
# normalize
X = X / float(len(alphabet))

In [47]:
# one hot encode the output variable
y = utils.to_categorical(dataY)

In [48]:
# create and fit the model
model = Sequential()

model.add(LSTM(16, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=5000, batch_size=len(dataX), verbose=2, shuffle=False)

Epoch 1/5000
 - 1s - loss: 3.2592 - acc: 0.0400
Epoch 2/5000
 - 0s - loss: 3.2590 - acc: 0.0400
Epoch 3/5000
 - 0s - loss: 3.2587 - acc: 0.0400
Epoch 4/5000
 - 0s - loss: 3.2584 - acc: 0.0400
Epoch 5/5000
 - 0s - loss: 3.2582 - acc: 0.0400
Epoch 6/5000
 - 0s - loss: 3.2579 - acc: 0.0400
Epoch 7/5000
 - 0s - loss: 3.2576 - acc: 0.0400
Epoch 8/5000
 - 0s - loss: 3.2572 - acc: 0.0400
Epoch 9/5000
 - 0s - loss: 3.2569 - acc: 0.0400
Epoch 10/5000
 - 0s - loss: 3.2566 - acc: 0.0400
Epoch 11/5000
 - 0s - loss: 3.2563 - acc: 0.0400
Epoch 12/5000
 - 0s - loss: 3.2560 - acc: 0.0400
Epoch 13/5000
 - 0s - loss: 3.2557 - acc: 0.0400
Epoch 14/5000
 - 0s - loss: 3.2554 - acc: 0.0400
Epoch 15/5000
 - 0s - loss: 3.2551 - acc: 0.0400
Epoch 16/5000
 - 0s - loss: 3.2548 - acc: 0.0400
Epoch 17/5000
 - 0s - loss: 3.2545 - acc: 0.0400
Epoch 18/5000
 - 0s - loss: 3.2542 - acc: 0.0800
Epoch 19/5000
 - 0s - loss: 3.2538 - acc: 0.0800
Epoch 20/5000
 - 0s - loss: 3.2535 - acc: 0.0800
Epoch 21/5000
 - 0s - loss: 3

<tensorflow.python.keras.callbacks.History at 0x12fe2156358>

In [49]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


In [50]:
# demonstrate some model predictions
for pattern in dataX:
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    
    print(seq_in, "->", result)

['A'] -> B
['B'] -> C
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> V
['V'] -> W
['W'] -> X
['X'] -> Y
['Y'] -> Z


In [52]:
# demonstrate predicting random patterns
print("Test a Random Pattern:")
for i in range(0,20):
    pattern_index = np.random.randint(len(dataX))
    pattern = dataX[pattern_index]
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    
    print(seq_in, "->", result)

Test a Random Pattern:
['W'] -> X
['V'] -> W
['M'] -> N
['O'] -> P
['Q'] -> R
['J'] -> K
['N'] -> O
['I'] -> J
['R'] -> S
['V'] -> W
['B'] -> C
['B'] -> C
['L'] -> M
['W'] -> X
['U'] -> V
['V'] -> W
['Y'] -> Z
['Y'] -> Z
['B'] -> C
['P'] -> Q


# Stateful LSTM for a One-Char to One-Char Mapping

In [56]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []

In [57]:
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [58]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (len(dataX), seq_length, 1))

In [59]:
# normalize
X = X / float(len(alphabet))

In [60]:
# one hot encode the output variable
y = utils.to_categorical(dataY)

In [61]:
# create and fit the model
batch_size = 1
model = Sequential()

model.add(LSTM(50, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

for i in range(300):
    model.fit(X, y, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()

Epoch 1/1
 - 1s - loss: 3.2789 - acc: 0.0000e+00
Epoch 1/1
 - 0s - loss: 3.2557 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.2432 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.2297 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.2126 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.1868 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.1400 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 3.0541 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 2.9672 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 2.9313 - acc: 0.1200
Epoch 1/1
 - 0s - loss: 3.0066 - acc: 0.1600
Epoch 1/1
 - 0s - loss: 3.1122 - acc: 0.1200
Epoch 1/1
 - 0s - loss: 2.8586 - acc: 0.1600
Epoch 1/1
 - 0s - loss: 2.8581 - acc: 0.1600
Epoch 1/1
 - 0s - loss: 2.8059 - acc: 0.1200
Epoch 1/1
 - 0s - loss: 2.7473 - acc: 0.1200
Epoch 1/1
 - 0s - loss: 2.6053 - acc: 0.0800
Epoch 1/1
 - 0s - loss: 2.4944 - acc: 0.2000
Epoch 1/1
 - 0s - loss: 2.4454 - acc: 0.2000
Epoch 1/1
 - 0s - loss: 2.3654 - acc: 0.2400
Epoch 1/1
 - 0s - loss: 2.2804 - acc: 0.2400
Epoch 1/1
 - 0s - loss: 2.1767 - acc: 0.3200
Epoch 

In [62]:
# summarize performance of the model
scores = model.evaluate(X, y, batch_size=batch_size, verbose=0)
model.reset_states()
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 100.00%


In [63]:
# demonstrate some model predictions
seed = [char_to_int[alphabet[0]]]
for i in range(0, len(alphabet)-1):
    x = np.reshape(seed, (1, len(seed), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    
    print(int_to_char[seed[0]], "->", int_to_char[index])
    seed = [index]
    
model.reset_states()

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [64]:
# demonstrate a random starting point
letter = "K"
seed = [char_to_int[letter]]
print("New start: ", letter)

for i in range(0, 5):
    x = np.reshape(seed, (1, len(seed), 1))
    x = x / float(len(alphabet))
    
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    
    print(int_to_char[seed[0]], "->", int_to_char[index])
    seed = [index]
    
model.reset_states()

New start:  K
K -> B
B -> C
C -> D
D -> E
E -> F


# LSTM with Variable-Length Input to One-Char Output

In [68]:
# prepare the dataset of input to output pairs encoded as integers
num_inputs = 1000
max_len = 5
dataX = []
dataY = []

In [69]:
for i in range(num_inputs):
    start = np.random.randint(len(alphabet)-2)
    end = np.random.randint(start, min(start+max_len,len(alphabet)-1))
    
    sequence_in = alphabet[start:end+1]
    sequence_out = alphabet[end + 1]
    
    dataX.append([char_to_int[char] for char in sequence_in])
    dataY.append(char_to_int[sequence_out])
    
    print(sequence_in, '->', sequence_out)

T -> U
GHIJ -> K
ABCDE -> F
OP -> Q
Q -> R
CDEF -> G
JK -> L
MNOP -> Q
JKL -> M
W -> X
U -> V
QRST -> U
ABCD -> E
HIJ -> K
TUVWX -> Y
T -> U
IJK -> L
TU -> V
DEF -> G
GH -> I
A -> B
STUVW -> X
IJKLM -> N
STUV -> W
GHI -> J
M -> N
AB -> C
FGHI -> J
ABC -> D
P -> Q
EFGH -> I
F -> G
OP -> Q
I -> J
XY -> Z
MNOP -> Q
W -> X
XY -> Z
HIJ -> K
XY -> Z
ST -> U
QRS -> T
QR -> S
DEFG -> H
LMNO -> P
X -> Y
MNOP -> Q
S -> T
ABC -> D
QRST -> U
JK -> L
UVWXY -> Z
PQRST -> U
LM -> N
IJKLM -> N
H -> I
ABC -> D
CDEF -> G
JKLMN -> O
QRSTU -> V
LMN -> O
EFGHI -> J
XY -> Z
ABCD -> E
KLM -> N
LMNO -> P
KL -> M
UV -> W
G -> H
BCD -> E
UV -> W
VWX -> Y
W -> X
MNOP -> Q
JKLMN -> O
FGHI -> J
XY -> Z
EF -> G
XY -> Z
EF -> G
OPQ -> R
IJKL -> M
CD -> E
BCD -> E
QRS -> T
UV -> W
UVWXY -> Z
BCDE -> F
UVWXY -> Z
BCDEF -> G
EFGHI -> J
R -> S
GHIJK -> L
OPQRS -> T
JK -> L
W -> X
STU -> V
DE -> F
N -> O
XY -> Z
MNOPQ -> R
V -> W
MN -> O
UVW -> X
TUVWX -> Y
IJKLM -> N
IJKLM -> N
EFG -> H
KLM -> N
RST -> U
HI -> J
RSTUV -

In [70]:
# convert list of lists to array and pad sequences if needed
X = pad_sequences(dataX, maxlen=max_len, dtype='float32')

In [71]:
# reshape X to be [samples, time steps, features]
X = np.reshape(X, (X.shape[0], max_len, 1))

In [72]:
# normalize
X = X / float(len(alphabet))

In [73]:
# one hot encode the output variable
y = utils.to_categorical(dataY)

In [74]:
# create and fit the model
batch_size = 1
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], 1)))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=batch_size, verbose=2)

Epoch 1/500
 - 9s - loss: 3.1030 - acc: 0.0620
Epoch 2/500
 - 9s - loss: 2.8481 - acc: 0.0950
Epoch 3/500
 - 9s - loss: 2.5345 - acc: 0.1780
Epoch 4/500
 - 9s - loss: 2.2544 - acc: 0.2360
Epoch 5/500
 - 9s - loss: 2.0674 - acc: 0.2900
Epoch 6/500
 - 9s - loss: 1.9200 - acc: 0.3380
Epoch 7/500
 - 9s - loss: 1.8030 - acc: 0.3980
Epoch 8/500
 - 9s - loss: 1.7070 - acc: 0.4010
Epoch 9/500
 - 9s - loss: 1.6189 - acc: 0.4370
Epoch 10/500
 - 9s - loss: 1.5371 - acc: 0.4540
Epoch 11/500
 - 9s - loss: 1.4702 - acc: 0.4820
Epoch 12/500
 - 9s - loss: 1.4120 - acc: 0.5300
Epoch 13/500
 - 9s - loss: 1.3474 - acc: 0.5580
Epoch 14/500
 - 9s - loss: 1.3012 - acc: 0.5640
Epoch 15/500
 - 9s - loss: 1.2385 - acc: 0.5920
Epoch 16/500
 - 9s - loss: 1.1887 - acc: 0.6120
Epoch 17/500
 - 9s - loss: 1.1441 - acc: 0.6280
Epoch 18/500
 - 9s - loss: 1.0964 - acc: 0.6450
Epoch 19/500
 - 9s - loss: 1.0536 - acc: 0.6680
Epoch 20/500
 - 9s - loss: 1.0219 - acc: 0.6680
Epoch 21/500
 - 9s - loss: 0.9863 - acc: 0.6970
E

<tensorflow.python.keras.callbacks.History at 0x12fe7d07400>

In [75]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 99.60%


In [76]:
# demonstrate some model predictions
for i in range(20):
    pattern_index = np.random.randint(len(dataX))
    pattern = dataX[pattern_index]
    x = pad_sequences([pattern], maxlen=max_len, dtype='float32')
    x = np.reshape(x, (1, max_len, 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['W'] -> X
['U', 'V', 'W', 'X', 'Y'] -> Z
['W', 'X'] -> Y
['N', 'O', 'P', 'Q', 'R'] -> S
['C', 'D', 'E', 'F', 'G'] -> H
['R', 'S', 'T'] -> U
['U', 'V', 'W', 'X', 'Y'] -> Z
['F', 'G', 'H', 'I'] -> J
['B', 'C', 'D'] -> E
['G', 'H', 'I'] -> J
['Q'] -> R
['K', 'L', 'M', 'N'] -> O
['T', 'U'] -> V
['G', 'H', 'I'] -> J
['J'] -> K
['L', 'M', 'N'] -> O
['D', 'E', 'F'] -> G
['Q'] -> R
['K', 'L', 'M', 'N'] -> O
['J', 'K', 'L', 'M', 'N'] -> O
