In [1]:
# Naive LSTM to learn three-char time steps to one-char mapping
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical

In [3]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
	seq_in = alphabet[i:i + seq_length]
	seq_out = alphabet[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
	print(seq_in, '->', seq_out)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = to_categorical(dataY)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [4]:
# create and fit the model
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 0s - loss: 3.2723 - accuracy: 0.0000e+00
Epoch 2/500
23/23 - 0s - loss: 3.2578 - accuracy: 0.0435
Epoch 3/500
23/23 - 0s - loss: 3.2490 - accuracy: 0.0435
Epoch 4/500
23/23 - 0s - loss: 3.2413 - accuracy: 0.0435
Epoch 5/500
23/23 - 0s - loss: 3.2328 - accuracy: 0.0435
Epoch 6/500
23/23 - 0s - loss: 3.2252 - accuracy: 0.0435
Epoch 7/500
23/23 - 0s - loss: 3.2159 - accuracy: 0.0435
Epoch 8/500
23/23 - 0s - loss: 3.2052 - accuracy: 0.0435
Epoch 9/500
23/23 - 0s - loss: 3.1946 - accuracy: 0.0435
Epoch 10/500
23/23 - 0s - loss: 3.1805 - accuracy: 0.0435
Epoch 11/500
23/23 - 0s - loss: 3.1650 - accuracy: 0.0435
Epoch 12/500
23/23 - 0s - loss: 3.1477 - accuracy: 0.0435
Epoch 13/500
23/23 - 0s - loss: 3.1324 - accuracy: 0.0435
Epoch 14/500
23/23 - 0s - loss: 3.1072 - accuracy: 0.0435
Epoch 15/500
23/23 - 0s - loss: 3.0871 - accuracy: 0.0435
Epoch 16/500
23/23 - 0s - loss: 3.0671 - accuracy: 0.0435
Epoch 17/500
23/23 - 0s - loss: 3.0476 - accuracy: 0.0435
Epoch 18/500
23/23 

Epoch 142/500
23/23 - 0s - loss: 1.2031 - accuracy: 0.8261
Epoch 143/500
23/23 - 0s - loss: 1.2004 - accuracy: 0.7826
Epoch 144/500
23/23 - 0s - loss: 1.1914 - accuracy: 0.8696
Epoch 145/500
23/23 - 0s - loss: 1.1858 - accuracy: 0.7391
Epoch 146/500
23/23 - 0s - loss: 1.1811 - accuracy: 0.8696
Epoch 147/500
23/23 - 0s - loss: 1.1751 - accuracy: 0.8696
Epoch 148/500
23/23 - 0s - loss: 1.1740 - accuracy: 0.7826
Epoch 149/500
23/23 - 0s - loss: 1.1629 - accuracy: 0.8261
Epoch 150/500
23/23 - 0s - loss: 1.1626 - accuracy: 0.8261
Epoch 151/500
23/23 - 0s - loss: 1.1534 - accuracy: 0.8261
Epoch 152/500
23/23 - 0s - loss: 1.1459 - accuracy: 0.9130
Epoch 153/500
23/23 - 0s - loss: 1.1442 - accuracy: 0.9130
Epoch 154/500
23/23 - 0s - loss: 1.1342 - accuracy: 0.8696
Epoch 155/500
23/23 - 0s - loss: 1.1303 - accuracy: 0.9130
Epoch 156/500
23/23 - 0s - loss: 1.1263 - accuracy: 0.8261
Epoch 157/500
23/23 - 0s - loss: 1.1143 - accuracy: 0.8696
Epoch 158/500
23/23 - 0s - loss: 1.1114 - accuracy: 0.86

Epoch 281/500
23/23 - 0s - loss: 0.5862 - accuracy: 0.9565
Epoch 282/500
23/23 - 0s - loss: 0.5805 - accuracy: 0.9565
Epoch 283/500
23/23 - 0s - loss: 0.5786 - accuracy: 0.9565
Epoch 284/500
23/23 - 0s - loss: 0.5757 - accuracy: 0.9565
Epoch 285/500
23/23 - 0s - loss: 0.5739 - accuracy: 1.0000
Epoch 286/500
23/23 - 0s - loss: 0.5740 - accuracy: 0.9565
Epoch 287/500
23/23 - 0s - loss: 0.5685 - accuracy: 0.9565
Epoch 288/500
23/23 - 0s - loss: 0.5584 - accuracy: 0.9565
Epoch 289/500
23/23 - 0s - loss: 0.5548 - accuracy: 1.0000
Epoch 290/500
23/23 - 0s - loss: 0.5604 - accuracy: 0.9565
Epoch 291/500
23/23 - 0s - loss: 0.5589 - accuracy: 0.9565
Epoch 292/500
23/23 - 0s - loss: 0.5534 - accuracy: 0.9565
Epoch 293/500
23/23 - 0s - loss: 0.5487 - accuracy: 0.9565
Epoch 294/500
23/23 - 0s - loss: 0.5461 - accuracy: 0.9130
Epoch 295/500
23/23 - 0s - loss: 0.5400 - accuracy: 0.9565
Epoch 296/500
23/23 - 0s - loss: 0.5406 - accuracy: 0.9565
Epoch 297/500
23/23 - 0s - loss: 0.5349 - accuracy: 0.95

Epoch 420/500
23/23 - 0s - loss: 0.2933 - accuracy: 0.9565
Epoch 421/500
23/23 - 0s - loss: 0.2942 - accuracy: 0.9565
Epoch 422/500
23/23 - 0s - loss: 0.2892 - accuracy: 1.0000
Epoch 423/500
23/23 - 0s - loss: 0.2846 - accuracy: 0.9565
Epoch 424/500
23/23 - 0s - loss: 0.2856 - accuracy: 0.9565
Epoch 425/500
23/23 - 0s - loss: 0.2808 - accuracy: 0.9565
Epoch 426/500
23/23 - 0s - loss: 0.2835 - accuracy: 1.0000
Epoch 427/500
23/23 - 0s - loss: 0.2803 - accuracy: 1.0000
Epoch 428/500
23/23 - 0s - loss: 0.2834 - accuracy: 0.9565
Epoch 429/500
23/23 - 0s - loss: 0.2828 - accuracy: 0.9565
Epoch 430/500
23/23 - 0s - loss: 0.2810 - accuracy: 0.9565
Epoch 431/500
23/23 - 0s - loss: 0.2771 - accuracy: 0.9565
Epoch 432/500
23/23 - 0s - loss: 0.2734 - accuracy: 1.0000
Epoch 433/500
23/23 - 0s - loss: 0.2736 - accuracy: 1.0000
Epoch 434/500
23/23 - 0s - loss: 0.2751 - accuracy: 1.0000
Epoch 435/500
23/23 - 0s - loss: 0.2666 - accuracy: 0.9565
Epoch 436/500
23/23 - 0s - loss: 0.2706 - accuracy: 0.95

<tensorflow.python.keras.callbacks.History at 0x7ff492cd2b90>

In [5]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))
# demonstrate some model predictions
for pattern in dataX:
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(len(alphabet))
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	print(seq_in, "->", result)

Model Accuracy: 95.65%
['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z
