<a href="https://colab.research.google.com/github/tangQAQ/Learning_Recorder/blob/main/LSTM_layer_explained.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

device = tf.test.gpu_device_name()

if device != '/device:GPU:0':
  print('GPU not find')
else: 
  print(f'Found GPU at{device}')

Found GPU at/device:GPU:0


In [None]:
from random import randint
from numpy import array
from numpy import argmax
import keras.backend as k
from tensorflow.keras import models, Input
from numpy import array_equal
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Flatten, TimeDistributed, RepeatVector 

In [None]:
def generate_sequence(length, n_unique):
  return [randint(0, n_unique-1) for _ in range(length)]

def one_hot_encode(sequence, n_unique):
  encoding = list()
  for value in sequence:
    vector = [0 for _ in range(n_unique)]
    vector[value] = 1
    encoding.append(vector)
  return array(encoding)

def one_hot_decode(encoded_seq):
  return [argmax(vector) for vector in encoded_seq]

def get_reversed_pairs(time_steps, vocabulary_size, verbose=False):
  sequence_in = generate_sequence(time_steps, vocabulary_size)
  sequence_out = sequence_in[::-1]

  X = one_hot_encode(sequence_in, vocabulary_size)
  y = one_hot_encode(sequence_out, vocabulary_size)

  X = X.reshape((1, X.shape[0], X.shape[1]))
  y = y.reshape((1, y.shape[0], y.shape[1]))

  if(verbose):
    print('Generated sequences as follows')
    print('\nOne Sample Input Sequence in raw format:')
    print('X[0]=%s' % (one_hot_decode(X[0])))
    print('\nIn one_hot_encoded format:')
    print('X[0]=%s' % (X[0]))
    print('\nShape of an input to LSTm (X[0].shape):', X.shape)
  return X,y

def create_dataset(train_size, test_size, time_steps, vocabulary_size):
  pairs = [get_reversed_pairs(time_steps, vocabulary_size) for _ in range(train_size)]
  pairs = np.array(pairs).squeeze()
  X_train = pairs[:, 0]
  y_train = pairs[:, 1]
  pairs = [get_reversed_pairs(time_steps, vocabulary_size) for _ in range(test_size)]
  pairs = np.array(pairs).squeeze()
  X_test = pairs[:, 0]
  y_test = pairs[:, 1]
  print('\nShape of Input Batch to LSTM (X_train.shape):', X_train.shape)
  return X_train, y_train, X_test, y_test

In [None]:
n_timesteps_in = 4
n_features = 10
#n_timesteps_out = 2

X, y = get_reversed_pairs(n_timesteps_in, n_features, verbose=True)
train_size = 100
test_size = 20

X_train, y_train, X_test, y_test = create_dataset(train_size, test_size, n_timesteps_in, n_features)

Generated sequences as follows

One Sample Input Sequence in raw format:
X[0]=[4, 2, 7, 5]

In one_hot_encoded format:
X[0]=[[0 0 0 0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 1 0 0 0 0]]

Shape of an input to LSTm (X[0].shape): (1, 4, 10)

Shape of Input Batch to LSTM (X_train.shape): (100, 4, 10)


In [None]:
numberOfLSTMunits = 16

input = Input(shape=(n_timesteps_in, n_features))
state_h = LSTM(numberOfLSTMunits) (input)
model1 = Model(inputs= input, outputs=state_h)
model1.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 4, 10)]           0         
                                                                 
 lstm (LSTM)                 (None, 16)                1728      
                                                                 
Total params: 1,728
Trainable params: 1,728
Non-trainable params: 0
_________________________________________________________________


In [None]:
result = model1.predict(X_train)
print('input shape', X_train.shape)
print('state_h shape', result.shape)
print('result for the first sample/input: \n', result[0])


input shape (100, 4, 10)
state_h shape (100, 16)
result for the first sample/input: 
 [-0.09239276 -0.09985759 -0.15129723 -0.04877616 -0.01588167 -0.05418357
 -0.08187836 -0.00935194  0.04639095  0.07869623 -0.10670013  0.02275475
 -0.04578882 -0.00931439  0.03336338 -0.03551181]


In [None]:
numberOfLSTMunits = 16

input = Input(shape=(n_timesteps_in, n_features))
all_state_h = LSTM(numberOfLSTMunits, return_sequences=True)(input)
model1 = Model(inputs= input, outputs = all_state_h)
model1.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 4, 10)]           0         
                                                                 
 lstm_1 (LSTM)               (None, 4, 16)             1728      
                                                                 
Total params: 1,728
Trainable params: 1,728
Non-trainable params: 0
_________________________________________________________________


In [None]:
result = model1.predict(X_train)

print('input shape:', X_train.shape)
print('all_state_h shape', result.shape)
print('\nhidden states for the first sample:\n', result[0])
print('\nhidden states for the first sample at the second time step: \n', result[0][1])

input shape: (100, 4, 10)
all_state_h shape (100, 4, 16)

hidden states for the first sample:
 [[-0.02262436 -0.04754497  0.05966783  0.05943919  0.05058412 -0.05572051
  -0.05256375 -0.05043213 -0.05960981  0.00568357  0.06567019 -0.01498767
   0.01152137  0.0574741  -0.00426618  0.01170248]
 [-0.0950058  -0.10511314 -0.00615508  0.0963183  -0.04407406 -0.06496571
  -0.09908493 -0.06814776 -0.08962796  0.03602935  0.11247353  0.00276205
  -0.00132382 -0.01584579  0.05472199 -0.03059291]
 [-0.15198945 -0.14708824 -0.05052714  0.13142194 -0.12282782 -0.05671879
  -0.12443785 -0.07534549 -0.11393125  0.06936996  0.1175988   0.01739546
  -0.00655721 -0.0759122   0.1069563  -0.05251282]
 [-0.15954188 -0.09931306 -0.07926431  0.14535303 -0.11735033 -0.06293015
  -0.01628185  0.00899775 -0.01372408  0.00669521  0.06697696  0.058496
  -0.05457247 -0.04768565  0.04309651  0.0382362 ]]

hidden states for the first sample at the second time step: 
 [-0.0950058  -0.10511314 -0.00615508  0.0963183

In [None]:
numberOfLSTMunits = 16

input = Input(shape=(n_timesteps_in, n_features))
LSTM_output, state_h, state_c = LSTM(numberOfLSTMunits, return_state=True)(input)
model1 = Model(inputs=input, outputs=[LSTM_output, state_h, state_c])
model1.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 4, 10)]           0         
                                                                 
 lstm_2 (LSTM)               [(None, 16),              1728      
                              (None, 16),                        
                              (None, 16)]                        
                                                                 
Total params: 1,728
Trainable params: 1,728
Non-trainable params: 0
_________________________________________________________________


In [None]:
print('Input layer output shape:', model1.get_layer(index=0).output_shape)
print('LSTM layer output shape:', model1.get_layer(index=1).output_shape)
results = model1.predict(X_train)
results = array(results)

print('\nWith batch of data:')
print('input shape:', X_train.shape)
print('result is 3 2D-array:', results.shape)
print('\nLSTM_output is in the first array:', results[0].shape)
print('\nstate_h which is exactly the same with LSTM_output is in the seconde array:', results[1].shape)
print('\nIs the content of LSTM_output and state_c exactly the same?\n', results[0]==results[2])
print('\nstate_c is in the third array:', results[2].shape)

Input layer output shape: [(None, 4, 10)]
LSTM layer output shape: [(None, 16), (None, 16), (None, 16)]

With batch of data:
input shape: (100, 4, 10)
result is 3 2D-array: (3, 100, 16)

LSTM_output is in the first array: (100, 16)

state_h which is exactly the same with LSTM_output is in the seconde array: (100, 16)

Is the content of LSTM_output and state_c exactly the same?
 [[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]

state_c is in the third array: (100, 16)


In [None]:
numberOfLSTMunits = 16

input = Input (shape=(n_timesteps_in, n_features))
all_state_h, state_h, state_c = LSTM(numberOfLSTMunits, return_sequences=True, return_state=True)(input)
model1 = Model(inputs=input, outputs=[all_state_h, state_h, state_c])
model1.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 4, 10)]           0         
                                                                 
 lstm_3 (LSTM)               [(None, 4, 16),           1728      
                              (None, 16),                        
                              (None, 16)]                        
                                                                 
Total params: 1,728
Trainable params: 1,728
Non-trainable params: 0
_________________________________________________________________


In [None]:
results = model1.predict(X_train)
print('\nWith batch of data:')
print('input shape:', X_train.shape)
print('result is 3 2D-array len(results):',len(results))
print('\nall_state_h is in the first array:', results[0].shape)
print('\nstate_h is in the second array:', results[1].shape)
print('\nstate_c is in the third array:', results[2].shape)


With batch of data:
input shape: (100, 4, 10)
result is 3 2D-array len(results): 3

all_state_h is in the first array: (100, 4, 16)

state_h is in the second array: (100, 16)

state_c is in the third array: (100, 16)
