In [1]:
#@title Check GPU

import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

SystemError: GPU device not found

In [2]:
#@title Version Info
print('tf version: ', tf.__version__)
print('tf.keras version:', tf.keras.__version__)

tf version:  2.12.0
tf.keras version: 2.12.0


In [5]:
from random import randint
from numpy import array
from numpy import argmax

from tensorflow.keras import models
from numpy import array_equal
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Input
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector

In [6]:
#@title Generate one_hot_encoded Input & Output Sequences


# generate a sequence of random integers
def generate_sequence(length, n_unique):
	return [randint(0, n_unique-1) for _ in range(length)]

# one hot encode sequence
def one_hot_encode(sequence, n_unique):
	encoding = list()
	for value in sequence:
		vector = [0 for _ in range(n_unique)]
		vector[value] = 1
		encoding.append(vector)
	return array(encoding)

# decode a one hot encoded string
def one_hot_decode(encoded_seq):
	return [argmax(vector) for vector in encoded_seq]

# prepare data for the LSTM
def get_pair(n_in, n_out, n_unique, verbose= False):
	# generate random sequence
	sequence_in = generate_sequence(n_in, n_unique)
	sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in-n_out)]
	# one hot encode
	X = one_hot_encode(sequence_in, n_unique)
	y = one_hot_encode(sequence_out, n_unique)
	# reshape as 3D
	X = X.reshape((1, X.shape[0], X.shape[1]))
	y = y.reshape((1, y.shape[0], y.shape[1]))

	if(verbose):
		print('Generated sequences as follows')
		print('X.shape: ', X.shape,'y.shape: ', y.shape)
		print('\nSample X and y')
		print('\nIn raw format:')
		print('X=%s, y=%s' % (one_hot_decode(X[0]), one_hot_decode(y[0])))
		print('\nIn one_hot_encoded format:')
		print('X=%s' % (X[0]))
	return X,y

In [7]:
n_timesteps_in = 4  #@param {type:"integer"}
#each input sample has 4 values

n_features = 10   #@param {type:"integer"}
#each value is one_hot_encoded with 10 0/1
n_timesteps_out = 2  #@param {type:"integer"}
#each output sample has 2 values padded with 0

# generate random sequence
X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features, verbose=True)

Generated sequences as follows
X.shape:  (1, 4, 10) y.shape:  (1, 4, 10)

Sample X and y

In raw format:
X=[8, 1, 9, 0], y=[8, 1, 0, 0]

In one_hot_encoded format:
X=[[0 0 0 0 0 0 0 0 1 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0]]


In [8]:
# Define model
LSTMcells= 100

input =Input(shape=(n_timesteps_in, n_features))
output= LSTM(LSTMcells, return_sequences=True) (input)
model1 = Model(inputs=input, outputs=output)
model1.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 4, 10)]           0         
                                                                 
 lstm (LSTM)                 (None, 4, 100)            44400     
                                                                 
Total params: 44,400
Trainable params: 44,400
Non-trainable params: 0
_________________________________________________________________


In [9]:
model = Sequential()
model.add(Input(shape=(n_timesteps_in, n_features)))
model.add(LSTM(150))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(150, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 150)               96600     
                                                                 
 repeat_vector (RepeatVector  (None, 4, 150)           0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 4, 150)            180600    
                                                                 
 time_distributed (TimeDistr  (None, 4, 10)            1510      
 ibuted)                                                         
                                                                 
Total params: 278,710
Trainable params: 278,710
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Train the LSTM
for epoch in range(500):
	# Generate new random sequence
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	# Fit model for one epoch on this sequence
	model.fit(X, y, epochs=1, verbose=0)

In [17]:
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

Accuracy: 25.00%


In [18]:
# spot check some examples
for _ in range(10):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	print('Input',one_hot_decode(X[0]),
	      'Expected:', one_hot_decode(y[0]), 
				   'Predicted', one_hot_decode(yhat[0]))

Input [8, 6, 7, 4] Expected: [8, 6, 0, 0] Predicted [8, 7, 0, 0]
Input [8, 9, 8, 0] Expected: [8, 9, 0, 0] Predicted [8, 8, 0, 0]
Input [4, 3, 4, 3] Expected: [4, 3, 0, 0] Predicted [4, 3, 0, 0]
Input [5, 3, 2, 6] Expected: [5, 3, 0, 0] Predicted [5, 5, 0, 0]
Input [8, 3, 6, 9] Expected: [8, 3, 0, 0] Predicted [8, 8, 0, 0]
Input [7, 6, 9, 5] Expected: [7, 6, 0, 0] Predicted [7, 7, 0, 0]
Input [5, 5, 9, 8] Expected: [5, 5, 0, 0] Predicted [5, 5, 0, 0]
Input [0, 3, 0, 6] Expected: [0, 3, 0, 0] Predicted [5, 0, 0, 0]
Input [9, 7, 7, 1] Expected: [9, 7, 0, 0] Predicted [7, 7, 0, 0]
Input [1, 6, 2, 6] Expected: [1, 6, 0, 0] Predicted [6, 6, 0, 0]


In [19]:
model2 = Sequential()
model2.add(Input(shape=(n_timesteps_in, n_features)))
model2.add(LSTM(150, return_sequences=True))
model2.add(LSTM(150, return_sequences=True))
model2.add(TimeDistributed(Dense(n_features, activation='softmax')))
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 4, 150)            96600     
                                                                 
 lstm_4 (LSTM)               (None, 4, 150)            180600    
                                                                 
 time_distributed_1 (TimeDis  (None, 4, 10)            1510      
 tributed)                                                       
                                                                 
Total params: 278,710
Trainable params: 278,710
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Train LSTM
for epoch in range(500):
	# generate new random sequence
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	# fit model for one epoch on this sequence
	model2.fit(X, y, epochs=1, verbose=0)

In [23]:
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model2.predict(X, verbose=0)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

Accuracy: 100.00%


In [25]:
# Check some examples
for _ in range(10):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model2.predict(X, verbose=0)
	print('Input',one_hot_decode(X[0]),
	      'Expected:', one_hot_decode(y[0]), 
				   'Predicted', one_hot_decode(yhat[0]))

Input [9, 4, 5, 5] Expected: [9, 4, 0, 0] Predicted [9, 4, 0, 0]
Input [3, 5, 3, 9] Expected: [3, 5, 0, 0] Predicted [3, 5, 0, 0]
Input [8, 7, 0, 1] Expected: [8, 7, 0, 0] Predicted [8, 7, 0, 0]
Input [6, 0, 9, 1] Expected: [6, 0, 0, 0] Predicted [6, 0, 0, 0]
Input [0, 7, 1, 1] Expected: [0, 7, 0, 0] Predicted [0, 7, 0, 0]
Input [1, 1, 5, 9] Expected: [1, 1, 0, 0] Predicted [1, 1, 0, 0]
Input [9, 9, 1, 5] Expected: [9, 9, 0, 0] Predicted [9, 9, 0, 0]
Input [2, 9, 8, 0] Expected: [2, 9, 0, 0] Predicted [2, 9, 0, 0]
Input [3, 0, 4, 0] Expected: [3, 0, 0, 0] Predicted [3, 0, 0, 0]
Input [8, 1, 4, 8] Expected: [8, 1, 0, 0] Predicted [8, 1, 0, 0]


So, we get the first two elements correctly. We pass the input in one-hot-encoding format. 


Two great references for LSTM:

- https://colah.github.io/posts/2015-08-Understanding-LSTMs/
- https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21