In [1]:
require 'nn'
require 'nngraph'
Plot = require "itorch.Plot"
lstm = {}
include('utils.lua')
include('LSTM.lua')

In [2]:
torch.setnumthreads(1)
net = lstm.LSTM({num_layers=2, input_dim=2, hidden_dim=3})

In [3]:
inputs = torch.randn(2,4)
result = net:forward(inputs)

In [5]:
grads = torch.randn(3,2,4)
in_grads = net:backward(inputs, grads)

In [9]:
result[{{},2,4}]

0.01 *
-3.1196
 1.3718
 3.1856
[torch.DoubleTensor of size 3]



In [6]:
self = {num_layers = 2, input_dim=2, hidden_dim=3}
local input = nn.Identity()()
local c_p = nn.Identity()()
local h_p = nn.Identity()()
local inputs = {input, c_p, h_p} -- {x_t, c_{t-1}, h_{t-1}}
local h, c = {}, {}


for l = 1, self.num_layers do
    local c_l_p = nn.SelectTable(l)(c_p)
    local h_l_p = nn.SelectTable(l)(h_p)
    local i2h
    if l == 1 then
        i2h = nn.Linear(self.input_dim, 4*self.hidden_dim)(input) -- W_x * x_t + b_x
    else
        i2h = nn.Linear(self.hidden_dim, 4*self.hidden_dim)(h[l-1]) -- W_x * h_{t-1} + b_x
    end
    local h2h = nn.Linear(self.hidden_dim, 4*self.hidden_dim)(h_l_p) -- W_h * h_{t-1} + b_h  
    -- preactivations for i_t, f_t, o_t, c_in_t (update)
    local preacts = nn.CAddTable()({i2h, h2h}) -- i2h + h2h

    -- direction of Narrow = 1 (column vector input)
    -- nonlinear:
    --     input, forget, and output gates get Sigmoid
    --     state update gets Tanh
    local all_gates = nn.Sigmoid()(nn.Narrow(1, 1, 3*self.hidden_dim)(preacts)) 
    local update = nn.Tanh()(nn.Narrow(1, 3*self.hidden_dim + 1, self.hidden_dim)(preacts))
    -- split gates into their variables
    local i_gate = nn.Narrow(1, 1, self.hidden_dim)(all_gates)
    local f_gate = nn.Narrow(1, self.hidden_dim + 1, self.hidden_dim)(all_gates)
    local o_gate = nn.Narrow(1, 2 * self.hidden_dim + 1, self.hidden_dim)(all_gates)
    -- new state, c = f_t .* c_p + i_t .* c_in_t
    c[l] = nn.CAddTable()({
            nn.CMulTable()({f_gate, c_l_p}),
            nn.CMulTable()({i_gate, update})
        })
    -- new hidden, h = o_t .* Tanh(c)
    h[l] = nn.CMulTable()({
            o_gate,
            nn.Tanh()(c[l])
        })
end
local outputs = {nn.Identity()(c), nn.Identity()(h)} -- output new state c, and new hidden h

cell = nn.gModule(inputs, outputs)


In [7]:
result = cell:forward({
        torch.Tensor{1,2}, 
        {torch.Tensor{1,2,3},torch.Tensor{1,2,3}}, 
        {torch.Tensor{1,2,3},torch.Tensor{1,2,3}}
    }
)

In [8]:
cell.output

{
  1 : 
    {
      1 : DoubleTensor - size: 3
      2 : DoubleTensor - size: 3
    }
  2 : 
    {
      1 : DoubleTensor - size: 3
      2 : DoubleTensor - size: 3
    }
}


In [9]:
cell:backward({torch.Tensor{1,2}, {torch.Tensor{1,2,3},torch.Tensor{1,2,3}}, {torch.Tensor{1,2,3},torch.Tensor{1,2,3}}},
    {{torch.randn(3), torch.randn(3)}, {torch.randn(3), torch.randn(3)}}
    )

{
  1 : DoubleTensor - size: 2
  2 : 
    {
      1 : DoubleTensor - size: 3
      2 : DoubleTensor - size: 3
    }
  3 : 
    {
      1 : DoubleTensor - size: 3
      2 : DoubleTensor - size: 3
    }
}
