In [1]:
require 'nn'
require 'nngraph'
Plot = require "itorch.Plot"
lstm = {}
include('utils.lua')
include('LSTM.lua')

In [2]:
torch.setnumthreads(1)
net = lstm.LSTM({num_layers=2, input_dim=2, hidden_dim=3})

In [3]:
inputs = torch.randn(5,2,4)
net:forget()
result = net:forward(inputs)

In [4]:
grads = torch.randn(5,3,2,4)
in_grads = net:backward(inputs, grads)

In [5]:
in_grads

(1,.,.) = 
  0.0090  0.0208 -0.0278 -0.2552
  0.0216 -0.0778 -0.0130  0.1596

(2,.,.) = 
  0.2274 -0.1074  0.2891  0.0035
  0.2124  0.0010 -0.0062  0.0005

(3,.,.) = 
  0.0225  0.0207 -0.0074  0.0059
 -0.0186  0.0901 -0.0268  0.0401

(4,.,.) = 
  0.1223  0.0559  0.0851 -0.1988
  0.1583  0.0659  0.0401  0.0882

(5,.,.) = 
  0.1387 -0.0185 -0.1944 -0.0403
  0.0097 -0.0395 -0.0684  0.0438
[torch.DoubleTensor of size 5x2x4]



In [12]:
self = {num_layers = 2, input_dim=2, hidden_dim=3}
local input = nn.Identity()()
local c_p = nn.Identity()()
local h_p = nn.Identity()()
local inputs = {input, c_p, h_p} -- {x_t, c_{t-1}, h_{t-1}}
local h, c = {}, {}


for l = 1, self.num_layers do
    local c_l_p = nn.SelectTable(l)(c_p)
    local h_l_p = nn.SelectTable(l)(h_p)
    local i2h
    if l == 1 then
        i2h = nn.Linear(self.input_dim, 4*self.hidden_dim)(input) -- W_x * x_t + b_x
    else
        i2h = nn.Linear(self.hidden_dim, 4*self.hidden_dim)(h[l-1]) -- W_x * h_{t-1} + b_x
    end
    local h2h = nn.Linear(self.hidden_dim, 4*self.hidden_dim)(h_l_p) -- W_h * h_{t-1} + b_h  
    -- preactivations for i_t, f_t, o_t, c_in_t (update)
    local preacts = nn.CAddTable()({i2h, h2h}) -- i2h + h2h

    -- direction of Narrow = 2 (row vector input)
    -- nonlinear:
    --     input, forget, and output gates get Sigmoid
    --     state update gets Tanh
    local all_gates = nn.Sigmoid()(nn.Narrow(2, 1, 3*self.hidden_dim)(preacts)) 
    local update = nn.Tanh()(nn.Narrow(2, 3*self.hidden_dim + 1, self.hidden_dim)(preacts))
    -- split gates into their variables
    local i_gate = nn.Narrow(2, 1, self.hidden_dim)(all_gates)
    local f_gate = nn.Narrow(2, self.hidden_dim + 1, self.hidden_dim)(all_gates)
    local o_gate = nn.Narrow(2, 2 * self.hidden_dim + 1, self.hidden_dim)(all_gates)
    -- new state, c = f_t .* c_p + i_t .* c_in_t
    c[l] = nn.CAddTable()({
            nn.CMulTable()({f_gate, c_l_p}),
            nn.CMulTable()({i_gate, update})
        })
    -- new hidden, h = o_t .* Tanh(c)
    h[l] = nn.CMulTable()({
            o_gate,
            nn.Tanh()(c[l])
        })
end
local outputs = {nn.Identity()(c), nn.Identity()(h)} -- output new state c, and new hidden h

cell = nn.gModule(inputs, outputs)


In [16]:
result = cell:forward({
        torch.randn(5,2), 
        {torch.randn(5,3),torch.randn(5,3)}, 
        {torch.randn(5,3),torch.randn(5,3)}
    }
)

In [17]:
result

{
  1 : 
    {
      1 : DoubleTensor - size: 5x3
      2 : DoubleTensor - size: 5x3
    }
  2 : 
    {
      1 : DoubleTensor - size: 5x3
      2 : DoubleTensor - size: 5x3
    }
}


In [18]:
cell:backward({torch.Tensor{5,2}, {torch.randn(5,3), torch.randn(5,3)}, {torch.randn(5,3), torch.randn(5,3)}},
    {{torch.randn(5,3),torch.randn(5,3)}, {torch.randn(5,3),torch.randn(5,3)}}
    )

{
  1 : DoubleTensor - size: 5x2
  2 : 
    {
      1 : DoubleTensor - size: 5x3
      2 : DoubleTensor - size: 5x3
    }
  3 : 
    {
      1 : DoubleTensor - size: 5x3
      2 : DoubleTensor - size: 5x3
    }
}


In [38]:
a = torch.randn(5,2,4)

In [41]:
a[{{}, {}, 1}]

 1.1834  0.9655
-0.5355  0.8005
 4.3073  0.5039
 0.2041  0.0605
 0.2387 -0.3394
[torch.DoubleTensor of size 5x2]



In [42]:
a:size()


 5
 2
 4
[torch.LongStorage of size 3]



In [43]:
#a:size()

3	


In [44]:
b= torch.randn(2,4)

In [45]:
b:reshape(1,2,4)

(1,.,.) = 
  0.2341 -0.6311 -1.8598 -1.4613
  0.3113  2.0795  0.7851 -1.4057
[torch.DoubleTensor of size 1x2x4]



In [46]:
b

 0.2341 -0.6311 -1.8598 -1.4613
 0.3113  2.0795  0.7851 -1.4057
[torch.DoubleTensor of size 2x4]

