# Mathematical Definitions

# Graphical Illustration

# Implementation in Keras

In [1]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np

Using TensorFlow backend.


## Setting up the model
Seting up an LSTM in `keras` is straightforward as `keras` has a pre-defined `LSTM` layer for that.

In [2]:
num_time_steps = 1
num_features = 2
num_units = 1

model = Sequential()
model.add(LSTM(input_shape=(num_time_steps,num_features),
               units=num_units,
               activation='tanh',
               recurrent_activation='sigmoid',
               use_bias=True))
model.compile(optimizer='adam', loss='MAE')

## Inspecting the model

In [3]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1)                 16        
Total params: 16
Trainable params: 16
Non-trainable params: 0
_________________________________________________________________


## Inspecting the weights

In [5]:
model.get_weights()

[array([[-0.7879    , -0.7968972 ,  0.9924617 , -0.6057646 ],
        [-0.14056396, -0.34560132,  0.9574034 ,  0.7960496 ]],
       dtype=float32),
 array([[-0.0660542 , -0.32508007, -0.8477706 ,  0.41381747]],
       dtype=float32),
 array([0., 1., 0., 0.], dtype=float32)]

## Feedforward

In [6]:
#x = np.array([[1,10],[2,20],[3,30]])
x = np.array([[1, 10]]) # this creates an input layer
X = x[np.newaxis,:,:] # usually multiple inputs are processed, which is why this additional axis is needed
Y = model.predict(X)  # computes the actual result
y = Y[0] # in this example, we only have 1 output

## Reproduce the computation

In [7]:
kernel, recurrent_kernel, bias = model.layers[0].get_weights()
inputs = x
activation = np.tanh
recurrent_activation = lambda x : 1/(1+np.exp(-x))
h_tm1 = 0
c_tm1 = 0

In [8]:
kernel_i = kernel[:, :num_units]
kernel_f = kernel[:, num_units: num_units * 2]
kernel_c = kernel[:, num_units * 2: num_units * 3]
kernel_o = kernel[:, num_units * 3:]

recurrent_kernel_i = recurrent_kernel[:, :num_units]
recurrent_kernel_f = recurrent_kernel[:, num_units: num_units * 2]
recurrent_kernel_c = recurrent_kernel[:, num_units * 2: num_units * 3]
recurrent_kernel_o = recurrent_kernel[:, num_units * 3:]

bias_i = bias[:num_units]
bias_f = bias[num_units: num_units * 2]
bias_c = bias[num_units * 2: num_units * 3]
bias_o = bias[num_units * 3:]

inputs_i = inputs
inputs_f = inputs
inputs_c = inputs
inputs_o = inputs

x_i = np.dot(inputs_i, kernel_i)
x_f = np.dot(inputs_f, kernel_f)
x_c = np.dot(inputs_c, kernel_c)
x_o = np.dot(inputs_o, kernel_o)

x_i = x_i + bias_i
x_f = x_f + bias_f
x_c = x_c + bias_c
x_o = x_o + bias_o

h_tm1_i = h_tm1
h_tm1_f = h_tm1
h_tm1_c = h_tm1
h_tm1_o = h_tm1

i = recurrent_activation(x_i + np.dot(h_tm1_i, recurrent_kernel_i))
f = recurrent_activation(x_f + np.dot(h_tm1_f, recurrent_kernel_f))
c = f * c_tm1 + i * activation(x_c + np.dot(h_tm1_c, recurrent_kernel_c))
o = recurrent_activation(x_o + np.dot(h_tm1_o, recurrent_kernel_o))

h = o * activation(c)

In [9]:
np.testing.assert_array_almost_equal(h,Y, decimal=4)

In [10]:
h, Y

(array([[0.09993291]]), array([[0.0999329]], dtype=float32))

In [11]:
print("x_i: %f" % x_i)
print("x_f: %f" % x_f)
print("x_c: %f" % x_c)
print("x_o: %f" % x_o)

print("i: %f" % i)
print("f: %f" % f)
print("c: %f" % c)
print("o: %f" % o)



x_i: -2.193540
x_f: -3.252910
x_c: 10.566496
x_o: 7.354731
i: 0.100332
f: 0.037222
c: 0.100332
o: 0.999361


In [12]:
https://keras.io/layers/recurrent/

https://adventuresinmachinelearning.com/keras-lstm-tutorial/

https://stackoverflow.com/questions/42861460/how-to-interpret-weights-in-a-lstm-layer-in-keras

https://github.com/keras-team/keras/blob/master/keras/layers/recurrent.py#L1863

http://deeplearning.net/tutorial/lstm.html

https://stackoverflow.com/questions/51199753/extract-cell-state-lstm-keras

https://stats.stackexchange.com/questions/221513/why-are-the-weights-of-rnn-lstm-networks-shared-across-time

SyntaxError: invalid syntax (<ipython-input-12-8fc7537cb60c>, line 1)