In [28]:
import numpy as np 
from scipy.special import expit as sigmoid
import torch
from torch import nn

In [29]:
#Set Parameters for a small LSTM network
input_size  = 1 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 30 cells in the LSTM layer
output_size = 1 # desired model output

In [30]:
#Initialize an PyTorch LSTM for comparison to our Numpy LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers=1):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True) # LSTM Layer
        self.fc = nn.Linear(hidden_dim, output_size) # fully-connected layer

    def forward(self, x, hidden):
        batch_size = 1
        # get LSTM outputs
        lstm_output, (h,c) = self.lstm(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        lstm_output = lstm_output.view(-1, self.hidden_dim)  
        # get final output 
        model_output = self.fc(lstm_output)
        
        return model_output, (h,c)
      
torch.manual_seed(42069)
torch_lstm = LSTM(input_size = input_size, 
                 hidden_dim = hidden_dim,
                 output_size = output_size,
                 )

state = torch_lstm.state_dict()
# print(state)
print(np.transpose(state['lstm.weight_ih_l0']))
print(np.transpose(state['lstm.bias_ih_l0']))
print(np.transpose(state['lstm.weight_hh_l0']))
print(np.transpose(state['lstm.bias_hh_l0']))
print(state['fc.weight'])
print(state['fc.bias'])


tensor([[ 0.3581,  0.1449, -0.1517, -0.0627,  0.2185,  0.3742,  0.1810, -0.3759,
          0.5075, -0.0862,  0.4901,  0.4955]])
tensor([-0.3353, -0.3428, -0.5422, -0.1341, -0.0048,  0.2355,  0.2891, -0.3969,
         0.2711,  0.1751, -0.0789, -0.3125])
tensor([[-0.3813, -0.0860,  0.4179,  0.5138, -0.4246, -0.0035,  0.1918, -0.5070,
          0.4756, -0.2009, -0.3760,  0.2356],
        [-0.0175,  0.3113,  0.3616, -0.3160,  0.5756, -0.4261,  0.0499, -0.1637,
          0.1121,  0.2702, -0.4745, -0.1081],
        [ 0.2550,  0.0359,  0.5455, -0.1759, -0.0060, -0.3326, -0.4534, -0.2762,
         -0.0910,  0.5070, -0.4937,  0.1096]])
tensor([-0.2699, -0.4251, -0.2621,  0.0815,  0.3582, -0.4591,  0.4775, -0.5156,
         0.5231, -0.4899,  0.2584, -0.3921])
tensor([[ 0.3681,  0.4744, -0.0601]])
tensor([-0.2165])


In [31]:
#--------------------------------------------------------------------
#Simple Time Series Data
# data = np.random.rand(2, input_size)
data = [[0.0517], [0.3158], [-0.432], [0.2567]]
print(data)

[[0.0517], [0.3158], [-0.432], [0.2567]]


In [32]:
#PyTorch expects an extra dimension for batch size:
torch_batch = torch.Tensor(data).unsqueeze(0) 
torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)

print('\nPyTorch LSTM Output:')
print(torch_output)
print('\n','-'*40)
print(f'Torch Hidden State: {torch_hidden}')
print(f'Torch Cell State: {torch_cell}\n')


PyTorch LSTM Output:
tensor([[-0.2451],
        [-0.2719],
        [-0.2536],
        [-0.2818]], grad_fn=<AddmmBackward>)

 ----------------------------------------
Torch Hidden State: tensor([[[ 0.1665, -0.2494,  0.1377]]], grad_fn=<StackBackward>)
Torch Cell State: tensor([[[ 0.4326, -0.4673,  0.3855]]], grad_fn=<StackBackward>)



In [33]:
def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    print('f_input: ', forget_hidden + forget_eventx)
    result = np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )
    return result

In [34]:
def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    print('i_input: ', ignore_hidden + ignore_eventx)
    print('c_input: ', learn_hidden + learn_eventx)
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )

In [35]:
def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

In [36]:
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    print('o_input: ', out_hidden + out_eventx)
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )

In [37]:
def model_output(lstm_output, fc_Weight, fc_Bias):
  ## Takes the LSTM output and transforms it to our desired output size using a final, fully connected layer
  return np.dot(fc_Weight, lstm_output) + fc_Bias

In [38]:
#Event (x) Weights and Biases for all gates
Weights_xi = state['lstm.weight_ih_l0'][0:hidden_dim].numpy()  # shape  [h, x]
Weights_xf = state['lstm.weight_ih_l0'][hidden_dim:hidden_dim*2].numpy()  # shape  [h, x]
Weights_xl = state['lstm.weight_ih_l0'][hidden_dim*2:hidden_dim*3].numpy()  # shape  [h, x]
Weights_xo = state['lstm.weight_ih_l0'][hidden_dim*3:hidden_dim*4].numpy() # shape  [h, x]

Bias_xi = state['lstm.bias_ih_l0'][0:hidden_dim].numpy()  #shape is [h, 1]
Bias_xf = state['lstm.bias_ih_l0'][hidden_dim:hidden_dim*2].numpy()  #shape is [h, 1]
Bias_xl = state['lstm.bias_ih_l0'][hidden_dim*2:hidden_dim*3].numpy()  #shape is [h, 1]
Bias_xo = state['lstm.bias_ih_l0'][hidden_dim*3:hidden_dim*4].numpy() #shape is [h, 1]

#Hidden state (h) Weights and Biases for all gates
Weights_hi = state['lstm.weight_hh_l0'][0:hidden_dim].numpy()  #shape is [h, h]
Weights_hf = state['lstm.weight_hh_l0'][hidden_dim:hidden_dim*2].numpy()  #shape is [h, h]
Weights_hl = state['lstm.weight_hh_l0'][hidden_dim*2:hidden_dim*3].numpy()  #shape is [h, h]
Weights_ho = state['lstm.weight_hh_l0'][hidden_dim*3:hidden_dim*4].numpy() #shape is [h, h]

Bias_hi = state['lstm.bias_hh_l0'][0:hidden_dim].numpy()  #shape is [h, 1]
Bias_hf = state['lstm.bias_hh_l0'][hidden_dim:hidden_dim*2].numpy()  #shape is [h, 1]
Bias_hl = state['lstm.bias_hh_l0'][hidden_dim*2:hidden_dim*3].numpy()  #shape is [h, 1]
Bias_ho = state['lstm.bias_hh_l0'][hidden_dim*3:hidden_dim*4].numpy() #shape is [h, 1]

# Final, fully connected layer Weights and Bias
fc_Weight = state['fc.weight'][0].numpy() #shape is [h, output_size]
fc_Bias = state['fc.bias'][0].numpy() #shape is [,output_size]


In [39]:
#Initialize cell and hidden states with zeroes
h = np.zeros(hidden_dim)
c = np.zeros(hidden_dim)

In [40]:
#Loop through a batch of data, updating the hidden and cell states each time
print('NumPy LSTM Output:')
i = 0
for eventx in data:
    print('\n', 'input:', eventx)
    
    f = forget_gate(eventx, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, c)
    print('f:', f)
    
    i = input_gate(eventx, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl)
    print('i:', i)

    c = cell_state(f,i)
    print('c:', c)

    h = output_gate(eventx, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, c)
    print('h:', h)
    print(model_output(h, fc_Weight, fc_Bias))

print(f'np Hidden State: {h}')
print(f'np Cell State: {c}')

NumPy LSTM Output:

 input: [0.0517]
f_input:  [-0.05589518  0.36465428 -0.20423766]
f: [0. 0. 0.]
i_input:  [-0.58665261 -0.76035089 -0.81214845]
c_input:  [ 0.77588532 -0.93191654  0.82045399]
i: [ 0.23243316 -0.23302962  0.20761461]
c: [ 0.23243316 -0.23302962  0.20761461]
o_input:  [-0.3193262   0.20481827 -0.67898336]
h: [ 0.09609288 -0.12613059  0.06887314]
-0.245144551475458

 input: [0.3158]
f_input:  [ 0.00467114  0.30853368 -0.0749157 ]
f: [ 0.11648801 -0.13434794  0.09992072]
i_input:  [-0.5089514  -0.76714461 -0.82009158]
c_input:  [ 0.80458947 -1.07828439  0.97977924]
i: [ 0.25026591 -0.25131913  0.23021632]
c: [ 0.36675392 -0.38566707  0.33013705]
o_input:  [-0.3605575   0.32396719 -0.50430056]
h: [ 0.14426071 -0.21332583  0.11997917]
-0.2718528413537026

 input: [-0.432]
f_input:  [ 0.09485281  0.07422218 -0.33471362]
f: [ 0.19206735 -0.19998651  0.13769824]
i_input:  [-0.78056683 -0.90494716 -0.69014801]
c_input:  [ 0.65097977 -0.8214569   0.60878946]
i: [ 0.17982453 -0