In [1]:
# LSTM with Variable Length Input Sequences to One Character Output
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# prepare the dataset of input to output pairs encoded as integers
num_inputs = 1000
max_len = 5
dataX = []
dataY = []
for i in range(num_inputs):
	start = numpy.random.randint(len(alphabet)-2)
	end = numpy.random.randint(start, min(start+max_len,len(alphabet)-1))
	sequence_in = alphabet[start:end+1]
	sequence_out = alphabet[end + 1]
	dataX.append([char_to_int[char] for char in sequence_in])
	dataY.append(char_to_int[sequence_out])
	print(sequence_in, '->', sequence_out)

OPQR -> S
V -> W
GHIJ -> K
GHI -> J
RS -> T
DE -> F
LMNOP -> Q
MN -> O
EFGHI -> J
HIJK -> L
TUVW -> X
KLMNO -> P
C -> D
G -> H
IJK -> L
QRST -> U
MN -> O
FGH -> I
TUV -> W
JKLM -> N
NOP -> Q
UV -> W
BCDE -> F
AB -> C
ABC -> D
DEFGH -> I
NOPQ -> R
CDEFG -> H
IJ -> K
V -> W
KLMN -> O
V -> W
F -> G
KL -> M
DEF -> G
RST -> U
EFGHI -> J
X -> Y
RS -> T
XY -> Z
BC -> D
LM -> N
GHIJ -> K
GHIJ -> K
STU -> V
RS -> T
QRST -> U
F -> G
OP -> Q
JKL -> M
CDE -> F
LMNOP -> Q
BC -> D
RST -> U
B -> C
ST -> U
PQR -> S
BCDE -> F
KL -> M
VWX -> Y
LM -> N
HIJK -> L
ST -> U
OPQ -> R
VW -> X
NOPQ -> R
GH -> I
Q -> R
NO -> P
RS -> T
B -> C
QR -> S
E -> F
DE -> F
BCDEF -> G
R -> S
LMNOP -> Q
NOP -> Q
TUV -> W
DEFG -> H
OPQRS -> T
NO -> P
X -> Y
ABCDE -> F
M -> N
MNO -> P
W -> X
WX -> Y
QRS -> T
KL -> M
U -> V
OP -> Q
IJKL -> M
KLMN -> O
OPQR -> S
M -> N
STUV -> W
LMNOP -> Q
BC -> D
W -> X
NOPQ -> R
EFGHI -> J
GHIJ -> K
WX -> Y
KLMN -> O
XY -> Z
P -> Q
GHI -> J
M -> N
MNOP -> Q
CDE -> F
M -> N
STUVW -> X
VWXY ->

In [4]:
# convert list of lists to array and pad sequences if needed
X = pad_sequences(dataX, maxlen=max_len, dtype='float32')
# reshape X to be [samples, time steps, features]
X = numpy.reshape(X, (X.shape[0], max_len, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = to_categorical(dataY)

In [5]:
# create and fit the model
batch_size = 1
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], 1)))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=batch_size, verbose=2)

Epoch 1/500
1000/1000 - 3s - loss: 3.1036 - accuracy: 0.0680
Epoch 2/500
1000/1000 - 3s - loss: 2.8421 - accuracy: 0.1070
Epoch 3/500
1000/1000 - 2s - loss: 2.4851 - accuracy: 0.1910
Epoch 4/500
1000/1000 - 3s - loss: 2.2341 - accuracy: 0.2310
Epoch 5/500
1000/1000 - 3s - loss: 2.0645 - accuracy: 0.2720
Epoch 6/500
1000/1000 - 2s - loss: 1.9354 - accuracy: 0.3240
Epoch 7/500
1000/1000 - 2s - loss: 1.8243 - accuracy: 0.3630
Epoch 8/500
1000/1000 - 3s - loss: 1.7292 - accuracy: 0.4070
Epoch 9/500
1000/1000 - 3s - loss: 1.6453 - accuracy: 0.4530
Epoch 10/500
1000/1000 - 2s - loss: 1.5637 - accuracy: 0.4760
Epoch 11/500
1000/1000 - 3s - loss: 1.5043 - accuracy: 0.4680
Epoch 12/500
1000/1000 - 3s - loss: 1.4381 - accuracy: 0.5300
Epoch 13/500
1000/1000 - 3s - loss: 1.3723 - accuracy: 0.5550
Epoch 14/500
1000/1000 - 2s - loss: 1.3250 - accuracy: 0.5660
Epoch 15/500
1000/1000 - 2s - loss: 1.2693 - accuracy: 0.5670
Epoch 16/500
1000/1000 - 2s - loss: 1.2219 - accuracy: 0.6080
Epoch 17/500
1000

Epoch 133/500
1000/1000 - 2s - loss: 0.3152 - accuracy: 0.8930
Epoch 134/500
1000/1000 - 2s - loss: 0.3709 - accuracy: 0.8770
Epoch 135/500
1000/1000 - 2s - loss: 0.3128 - accuracy: 0.9030
Epoch 136/500
1000/1000 - 2s - loss: 0.3181 - accuracy: 0.8930
Epoch 137/500
1000/1000 - 2s - loss: 0.3264 - accuracy: 0.8970
Epoch 138/500
1000/1000 - 3s - loss: 0.4816 - accuracy: 0.8590
Epoch 139/500
1000/1000 - 3s - loss: 0.3024 - accuracy: 0.9000
Epoch 140/500
1000/1000 - 3s - loss: 0.3044 - accuracy: 0.9050
Epoch 141/500
1000/1000 - 3s - loss: 0.3547 - accuracy: 0.8750
Epoch 142/500
1000/1000 - 3s - loss: 0.3059 - accuracy: 0.9020
Epoch 143/500
1000/1000 - 2s - loss: 0.3982 - accuracy: 0.8720
Epoch 144/500
1000/1000 - 2s - loss: 0.3159 - accuracy: 0.8940
Epoch 145/500
1000/1000 - 3s - loss: 0.3020 - accuracy: 0.8980
Epoch 146/500
1000/1000 - 3s - loss: 0.3439 - accuracy: 0.8920
Epoch 147/500
1000/1000 - 2s - loss: 0.3039 - accuracy: 0.9020
Epoch 148/500
1000/1000 - 3s - loss: 0.3086 - accuracy:

1000/1000 - 3s - loss: 0.2015 - accuracy: 0.9380
Epoch 264/500
1000/1000 - 3s - loss: 0.1977 - accuracy: 0.9410
Epoch 265/500
1000/1000 - 3s - loss: 0.2307 - accuracy: 0.9180
Epoch 266/500
1000/1000 - 2s - loss: 0.2013 - accuracy: 0.9370
Epoch 267/500
1000/1000 - 2s - loss: 0.2054 - accuracy: 0.9320
Epoch 268/500
1000/1000 - 2s - loss: 0.3540 - accuracy: 0.8790
Epoch 269/500
1000/1000 - 2s - loss: 0.1965 - accuracy: 0.9430
Epoch 270/500
1000/1000 - 2s - loss: 0.1964 - accuracy: 0.9440
Epoch 271/500
1000/1000 - 3s - loss: 0.1993 - accuracy: 0.9380
Epoch 272/500
1000/1000 - 3s - loss: 0.2054 - accuracy: 0.9250
Epoch 273/500
1000/1000 - 3s - loss: 0.2026 - accuracy: 0.9310
Epoch 274/500
1000/1000 - 3s - loss: 0.3352 - accuracy: 0.9000
Epoch 275/500
1000/1000 - 2s - loss: 0.1924 - accuracy: 0.9460
Epoch 276/500
1000/1000 - 3s - loss: 0.1957 - accuracy: 0.9270
Epoch 277/500
1000/1000 - 3s - loss: 0.1990 - accuracy: 0.9340
Epoch 278/500
1000/1000 - 2s - loss: 0.1955 - accuracy: 0.9380
Epoch 

Epoch 394/500
1000/1000 - 3s - loss: 0.2490 - accuracy: 0.9350
Epoch 395/500
1000/1000 - 3s - loss: 0.1406 - accuracy: 0.9600
Epoch 396/500
1000/1000 - 2s - loss: 0.1377 - accuracy: 0.9690
Epoch 397/500
1000/1000 - 2s - loss: 0.1430 - accuracy: 0.9520
Epoch 398/500
1000/1000 - 2s - loss: 0.1402 - accuracy: 0.9590
Epoch 399/500
1000/1000 - 2s - loss: 0.1409 - accuracy: 0.9600
Epoch 400/500
1000/1000 - 3s - loss: 0.1465 - accuracy: 0.9580
Epoch 401/500
1000/1000 - 4s - loss: 0.1439 - accuracy: 0.9560
Epoch 402/500
1000/1000 - 2s - loss: 0.1402 - accuracy: 0.9530
Epoch 403/500
1000/1000 - 2s - loss: 0.2066 - accuracy: 0.9370
Epoch 404/500
1000/1000 - 2s - loss: 0.2045 - accuracy: 0.9600
Epoch 405/500
1000/1000 - 2s - loss: 0.1352 - accuracy: 0.9670
Epoch 406/500
1000/1000 - 2s - loss: 0.1364 - accuracy: 0.9600
Epoch 407/500
1000/1000 - 3s - loss: 0.1397 - accuracy: 0.9580
Epoch 408/500
1000/1000 - 2s - loss: 0.1403 - accuracy: 0.9570
Epoch 409/500
1000/1000 - 2s - loss: 0.1394 - accuracy:

<tensorflow.python.keras.callbacks.History at 0x7fef3ee60610>

In [6]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))
# demonstrate some model predictions
for i in range(20):
	pattern_index = numpy.random.randint(len(dataX))
	pattern = dataX[pattern_index]
	x = pad_sequences([pattern], maxlen=max_len, dtype='float32')
	x = numpy.reshape(x, (1, max_len, 1))
	x = x / float(len(alphabet))
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	print(seq_in, "->", result)

Model Accuracy: 98.40%
['O', 'P', 'Q', 'R', 'S'] -> T
['L', 'M'] -> N
['T', 'U', 'V', 'W'] -> X
['X', 'Y'] -> Z
['E', 'F', 'G', 'H', 'I'] -> J
['X', 'Y'] -> Z
['N', 'O', 'P', 'Q', 'R'] -> S
['W', 'X', 'Y'] -> Z
['A', 'B'] -> C
['O', 'P', 'Q'] -> R
['P', 'Q', 'R', 'S', 'T'] -> U
['G', 'H', 'I', 'J'] -> K
['G'] -> H
['N', 'O', 'P', 'Q'] -> R
['I', 'J', 'K', 'L'] -> M
['F', 'G', 'H'] -> I
['O', 'P', 'Q'] -> R
['O', 'P', 'Q', 'R', 'S'] -> T
['P'] -> Q
['W'] -> X


In [7]:
model.save("nto1.h5")