In [1]:
-- basic operands
a = torch.Tensor(5, 3)  -- construct a 5x3 matrix unintialized
print(a)

 6.9410e-310  1.4917e-154   0.0000e+00
  1.3956e-75   0.0000e+00   9.9513e-43
  0.0000e+00  7.9979e+169   0.0000e+00
 6.9534e-309   0.0000e+00   0.0000e+00
  0.0000e+00   0.0000e+00   0.0000e+00
[torch.DoubleTensor of size 5x3]



In [2]:
a = torch.rand(5, 3)
print(a)

 0.5057  0.8245  0.7690
 0.0318  0.3256  0.8298
 0.9671  0.1232  0.2564
 0.4767  0.0911  0.3251
 0.1802  0.4645  0.0414
[torch.DoubleTensor of size 5x3]



In [3]:
b = torch.rand(3, 4)

In [4]:
-- three ways of doing multiplication
a*b

 1.5327  1.6127  1.0020  0.7498
 1.0899  0.8610  0.5248  0.2789
 0.5520  0.9612  0.3875  1.0297
 0.4929  0.6310  0.2775  0.5648
 0.4552  0.5796  0.4184  0.2090
[torch.DoubleTensor of size 5x4]



In [5]:
torch.mm(a, b)

 1.5327  1.6127  1.0020  0.7498
 1.0899  0.8610  0.5248  0.2789
 0.5520  0.9612  0.3875  1.0297
 0.4929  0.6310  0.2775  0.5648
 0.4552  0.5796  0.4184  0.2090
[torch.DoubleTensor of size 5x4]



In [6]:
c = torch.zeros(5, 4)
c:mm(a, b)

In [7]:
print(c)

In [8]:
--- concatenation
torch.cat(torch.ones(3), torch.zeros(2))

 1
 1
 1
 0
 0
[torch.DoubleTensor of size 5]



In [9]:
torch.cat(torch.ones(3, 2), torch.zeros(2, 2), 1)

In [10]:
torch.cat(torch.ones(3, 2), torch.zeros(3, 3), 2)

 1  1  0  0  0
 1  1  0  0  0
 1  1  0  0  0
[torch.DoubleTensor of size 3x5]



In [11]:
-- neural networks
-- linear module
require 'nn';
lin = nn.Linear(5, 3)

In [12]:
lin

nn.Linear(5 -> 3)
{
  gradBias : DoubleTensor - size: 3
  weight : DoubleTensor - size: 3x5
  _type : torch.DoubleTensor
  output : DoubleTensor - empty
  gradInput : DoubleTensor - empty
  bias : DoubleTensor - size: 3
  gradWeight : DoubleTensor - size: 3x5
}


In [13]:
lin['weight']

 0.1784  0.2629 -0.0544 -0.3481 -0.3550
 0.2463 -0.2964  0.2082 -0.0777  0.2965
-0.3059  0.3679 -0.2043 -0.1491  0.0166
[torch.DoubleTensor of size 3x5]



In [14]:
-- forward pass
x = torch.rand(5)
print(x)

In [15]:
y = lin:forward(x)
print(y)

-0.1520
 0.3172
-0.0344
[torch.DoubleTensor of size 3]



In [16]:
lin.weight * x + lin.bias

-0.1520
 0.3172
-0.0344
[torch.DoubleTensor of size 3]



In [17]:
-- backward pass
lin:zeroGradParameters()
print(lin.gradWeight)

 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
[torch.DoubleTensor of size 3x5]



In [18]:
grad = torch.ones(3):mul(-1)
grad[2] = 1
print(grad)

-1
 1
-1
[torch.DoubleTensor of size 3]



In [19]:
lin:backward(x, grad)

 0.3739
-0.9272
 0.4668
 0.4196
 0.6349
[torch.DoubleTensor of size 5]



In [20]:
print(lin.gradWeight)

In [21]:
-- manual gradient descent
lin.weight:add(0.1*lin.gradWeight)
lin.bias:add(0.1*lin.gradBias)

In [22]:
lin:forward(x)

-0.4520
 0.6171
-0.3343
[torch.DoubleTensor of size 3]



In [23]:
-- more complicated networks
-- with package 'nn'
net = nn.Sequential();
net:add(nn.Linear(20, 10));
net:add(nn.Tanh());
net:add(nn.Linear(10, 10));
net:add(nn.Tanh());
net:add(nn.Linear(10, 1));

In [24]:
x = torch.rand(20)
y1 = net:forward(x)
print(y1)

-0.2079
[torch.DoubleTensor of size 1]



In [25]:
-- with package 'nngraph'
require 'nngraph';
g1 = - nn.Linear(20, 10)

g2 = g1
   - nn.Tanh()
   - nn.Linear(10, 10)
   - nn.Tanh()
   - nn.Linear(10, 1)
gnet = nn.gModule({g1}, {g2})

In [26]:
-- fancy network
input = - nn.Identity()
L1 = input
   - nn.Linear(10, 20)
   - nn.Tanh()
L2 = {input, L1}
   - nn.JoinTable(1)
   - nn.Linear(30, 60)
   - nn.Tanh()
L3 = {L1, L2}
   - nn.JoinTable(1)
   - nn.Linear(80, 1)
   - nn.Tanh()
g = nn.gModule({input},{L3})

In [27]:
require 'pretty-nn'
graph.dot(g.fg, 'fancy', 'fancy')

In [28]:
-- training --
-- load serialized preprocessed data
xx, yy = unpack(torch.load("redwine-quality.torch"))
print(xx:size())
print(yy:size())

 1599
   11
[torch.LongStorage of size 2]

 1599
[torch.LongStorage of size 1]



In [29]:
torch.manualSeed(1234)
-- build the network
g1 = - nn.Linear(11, 11)
g2 = g1
   - nn.Tanh()
   - nn.Linear(11, 11)
   - nn.Tanh()
   - nn.Linear(11, 1)
winenet = nn.gModule({g1}, {g2})
-- mean square error
loss = nn.MSECriterion()

In [30]:
-- train!
require 'optim';
n_epoches = 10
n_examples = xx:size(1)

W, gradW = winenet:getParameters()
optimState = {}

for epoch = 1, n_epoches do
   local total_loss = 0
   for i=1, n_examples do
     x = xx[i]
     y = torch.Tensor({yy[i]})
     winenet:zeroGradParameters()
     function feval()
        local predicted = winenet:forward(x)
        local L = loss:forward(predicted, y)
        total_loss = total_loss + L
        local dL_dy = loss:backward(predicted, y)
        winenet:backward(x, dL_dy) -- computes and updates gradW
        return L, gradW
     end
     optim.adadelta(feval, W, optimState)
   end
   print('at epoch', epoch, 'avg loss', total_loss/n_examples)
end

at epoch	1	avg loss	0.01004729150127	


at epoch	2	avg loss	0.0052290742312656	


at epoch	3	avg loss	0.0049271944192332	


at epoch	4	avg loss	0.0047921190128562	


at epoch	5	avg loss	0.0047070290363173	


at epoch	6	avg loss	0.0046433421289942	


at epoch	7	avg loss	0.0046036616283334	


at epoch	8	avg loss	0.0045714431047861	


at epoch	9	avg loss	0.004543280727934	


at epoch	10	avg loss	0.0045181197352359	


In [31]:
-- RNN
-- stepwise function
ht1 = - nn.Identity()
xt = - nn.Identity()
ht = {ht1, xt} 
   - nn.JoinTable(1)
   - nn.Linear(20, 10)
   - nn.Tanh()
stepfunction = nn.gModule({ht1, xt}, {ht})

In [32]:
-- build clones

function share_params(cell, src)
  if torch.type(cell) == 'nn.gModule' then
    for i = 1, #cell.forwardnodes do
      local node = cell.forwardnodes[i]
      if node.data.module then
        node.data.module:share(src.forwardnodes[i].data.module,
                               'weight', 'bias', 'gradWeight', 'gradBias')
      end
    end
  elseif torch.isTypeOf(cell, 'nn.Module') then
    cell:share(src, 'weight', 'bias', 'gradWeight', 'gradBias')
  else
    error('parameters cannot be shared for this input')
  end
end

-- getParameters() must be called before cloning, 
-- since getParameters() function will reallocate memory for W and gradW
-- the pointers in clones created before getParameters() will not be valid
-- after calling this function
W, gradW = stepfunction:getParameters()

clones = {}
N = 100
for i = 1, N do
    clones[i] = stepfunction:clone()
    share_params(clones[i], stepfunction)
end

In [33]:
n = 7
h0 = torch.rand(10)
x = torch.rand(n, 10)
h = torch.zeros(n+1, 10)

W:uniform(-1, 1)

-- forward
h[1] = h0
for i = 1, n do
    h[i+1] = clones[i]:forward{h[i], x[i]}
end

In [34]:
print(h)

 0.5982  0.7899  0.1065  0.3861  0.2972  0.9791  0.0584  0.2194  0.6213  0.0929
-0.8879  0.9448 -0.9973  0.2613 -0.6036  0.8370  0.9533  0.2834 -0.9841 -0.9885
-0.9585 -0.8517 -0.9440  0.9861  0.7925  0.9844 -0.4312  0.9621 -0.9957 -0.7714
-0.9906 -0.9941 -0.6195  0.9659 -0.7310  0.4731  0.5748 -0.6886 -0.9582 -0.9616
 0.2694 -0.9459  0.4245  0.9859 -0.8624  0.9984 -0.4313  0.6631 -0.6663 -0.9247
-0.9560 -0.2291  0.7429  0.7207  0.8395  0.8807  0.7682 -0.9345 -0.9656 -0.9701
-0.6979 -0.8491 -0.9684  0.5480 -0.4236  0.9153  0.9843 -0.5267 -0.4332 -0.9978
-0.4794 -0.7863 -0.6081  0.9659  0.3993  0.9734 -0.2141  0.8281 -0.9948 -0.8532
[torch.DoubleTensor of size 8x10]



In [35]:
-- before backward
-- clean grad weights
stepfunction:zeroGradParameters()
print(gradW:norm())

In [36]:
-- backward

grad_h = torch.zeros(h:size())
grad_h[n+1] = torch.rand(10)
for i = n, 1, -1 do
    local grads = clones[i]:backward({h[i], x[i]}, grad_h[i+1])
    grad_h[i], grad_xi = unpack(grads)
end

In [37]:
print(grad_h)

 0.0485 -0.0076 -0.1426 -0.1481  0.2058 -0.3213  0.0837 -0.1005  0.1704 -0.0184
 0.1845 -0.0528 -0.0360 -0.1501 -0.2151  0.0847  0.2186  0.1488 -0.0878  0.0359
-0.2914  0.2014  0.1656  0.1997  0.3198  0.1600 -0.1599  0.0092  0.1554  0.3294
 0.2680 -0.1166 -0.4199 -0.0612 -0.2679 -0.1568  0.1995 -0.0984  0.0512 -0.2923
 0.0193  0.0260  0.4646 -0.0356 -0.1129  0.4859 -0.0190  0.4001 -0.4585  0.2868
-0.0027  0.0779 -0.3206 -0.1302  1.1846 -1.5339 -0.7444 -0.8610  0.8834 -0.2304
-0.1865  0.0984  0.7338 -0.7272 -1.1351  0.1258  1.1297  0.3235  0.4508  0.2306
 0.8840  0.9886  0.5671  0.0696  0.9945  0.0574  0.1787  0.8679  0.0122  0.8638
[torch.DoubleTensor of size 8x10]



In [38]:
print(gradW:norm())

5.4113012786391	
