In [1]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

http://colah.github.io/posts/2015-08-Understanding-LSTMs/

![title](imgs/LSTM3-chain.png)

![title](imgs/LSTM2-notation.png)

In [5]:
data_x1 = np.array([[0., 1.], [1, 0], [1, 1], [0, -1], [-1, 0], [-1, -1], [-1,-1]])
data_x2 = np.array([[1., 0.], [-1, 1], [0, -2], [2, 1], [1, -1], [2, 2], [1, 1]])

data_dim =  data_x1.shape[1]
n_inputs = data_x1.shape[0]
n_nodes = 3
n_steps = 2

weights_dim = [data_dim + n_nodes, n_nodes]

lt_memory = np.zeros([n_inputs, n_nodes])
st_memory = np.zeros([n_inputs, n_nodes])

In [6]:
np.concatenate([data_x1, st_memory], axis=1)

array([[ 0.,  1.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  0.,  0.,  0.],
       [ 0., -1.,  0.,  0.,  0.],
       [-1.,  0.,  0.,  0.,  0.],
       [-1., -1.,  0.,  0.,  0.],
       [-1., -1.,  0.,  0.,  0.]])

## Forget gate layer

![title](imgs/LSTM3-focus-f.png)

In [12]:
np.random.seed(666)

Wf = 2 * np.random.rand(*weights_dim) - 1
bf = 2 * np.random.rand(n_nodes) - 1

def gateLayer(x, h, W, b):
    return sigmoid(np.matmul(np.concatenate([x, h], axis=1), W) + b)

forget_gate1 = gateLayer(data_x1, st_memory, Wf, bf)
forget_gate1

array([[ 0.73197675,  0.47829704,  0.14818873],
       [ 0.72108162,  0.42521307,  0.39621623],
       [ 0.803063  ,  0.64600098,  0.19847554],
       [ 0.52329382,  0.13093712,  0.54991046],
       [ 0.53695366,  0.15733985,  0.24465725],
       [ 0.42369298,  0.0703666 ,  0.46189409],
       [ 0.42369298,  0.0703666 ,  0.46189409]])

## Input gate layer & tanh layer

![title](imgs/LSTM3-focus-i.png)

In [16]:
Wi = 2 * np.random.rand(*weights_dim) - 1
bi = 2 * np.random.rand(n_nodes) - 1

WC = 2 * np.random.rand(*weights_dim) - 1
bC = 2 * np.random.rand(n_nodes) - 1

def tanhLayer(x, h, WC, bC):
    return np.tanh(np.matmul(np.concatenate([x, h], axis=1), WC) + bC)

input_gate1 = gateLayer(data_x1, st_memory, Wi, bi)
print(input_gate1)

tanh_gate1 = tanhLayer(data_x1, st_memory, WC, bC)
print(tanh_gate1)

[[ 0.37960833  0.36042807  0.46263574]
 [ 0.24426619  0.31878066  0.56593779]
 [ 0.20454467  0.1720362   0.54548179]
 [ 0.4915469   0.74082486  0.50399829]
 [ 0.64666322  0.77489047  0.40154326]
 [ 0.69701036  0.8857472   0.42160843]
 [ 0.69701036  0.8857472   0.42160843]]
[[ 0.04600799  0.51793142  0.52101879]
 [-0.71725169  0.84304518 -0.05423635]
 [ 0.03103179  0.76607325  0.70943963]
 [-0.94883506  0.76791217 -0.86248108]
 [-0.7023707   0.34267455 -0.58558794]
 [-0.94731803  0.52119215 -0.92334738]
 [-0.94731803  0.52119215 -0.92334738]]


## Update Long time memory
![title](imgs/LSTM3-focus-C.png)

In [17]:
lt_memory1 = forget_gate1 * lt_memory + input_gate1 * tanh_gate1
lt_memory1

array([[ 0.01746501,  0.18667702,  0.24104191],
       [-0.17520034,  0.2687465 , -0.0306944 ],
       [ 0.00634739,  0.13179233,  0.3869864 ],
       [-0.46639693,  0.56888843, -0.43468899],
       [-0.4541973 ,  0.26553524, -0.23513889],
       [-0.66029049,  0.46164449, -0.38929103],
       [-0.66029049,  0.46164449, -0.38929103]])

## Output gate and generate output

![title](imgs/LSTM3-focus-o.png)

In [20]:
Wo = 2 * np.random.rand(*weights_dim) - 1
bo = 2 * np.random.rand(n_nodes) - 1

output_gate1 = gateLayer(data_x1, st_memory, Wo, bo)
print(input_gate1)

output1 = output_gate1 * np.tanh(lt_memory1)
print(output1)

[[ 0.37960833  0.36042807  0.46263574]
 [ 0.24426619  0.31878066  0.56593779]
 [ 0.20454467  0.1720362   0.54548179]
 [ 0.4915469   0.74082486  0.50399829]
 [ 0.64666322  0.77489047  0.40154326]
 [ 0.69701036  0.8857472   0.42160843]
 [ 0.69701036  0.8857472   0.42160843]]
[[ 0.0115965   0.10223105  0.15532237]
 [-0.05044544  0.08215761 -0.01627125]
 [ 0.00313256  0.06458154  0.22006581]
 [-0.11290898  0.11036491 -0.21560403]
 [-0.26709437  0.11071561 -0.15094074]
 [-0.24031168  0.11161061 -0.21879666]
 [-0.24031168  0.11161061 -0.21879666]]
