In [1]:
import random
import torch
from torch.autograd import Function
from torch.autograd import Variable
from scipy.signal import convolve2d, correlate2d
from torch.nn.modules.module import Module 
from torch.nn.parameter import Parameter


In [3]:
class DynamicNet(torch.nn.Module):
  def __init__(self, D_in, H, D_out):
    """
    In the constructor we construct three nn.Linear instances that we will use
    in the forward pass.
    """
    super(DynamicNet, self).__init__()
    self.input_linear = torch.nn.Linear(D_in, H)
    self.middle_linear = torch.nn.Linear(H, H)
    self.output_linear = torch.nn.Linear(H, D_out)

  def forward(self, x):
    """
    For the forward pass of the model, we randomly choose either 0, 1, 2, or 3
    and reuse the middle_linear Module that many times to compute hidden layer
    representations.

    Since each forward pass builds a dynamic computation graph, we can use normal
    Python control-flow operators like loops or conditional statements when
    defining the forward pass of the model.

    Here we also see that it is perfectly safe to reuse the same Module many
    times when defining a computational graph. This is a big improvement from Lua
    Torch, where each Module could be used only once.
    """
    h_relu = self.input_linear(x).clamp(min=0)
    for _ in range(random.randint(0, 3)):
      h_relu = self.middle_linear(h_relu).clamp(min=0)
      print(_)
    y_pred = self.output_linear(h_relu)
    return y_pred


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Construct our model by instantiating the class defined above
model = DynamicNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
for t in range(50):
  # Forward pass: Compute predicted y by passing x to the model
  y_pred = model(x)

  # Compute and print loss
  loss = criterion(y_pred, y)
  print(t, loss.data[0])

  # Zero gradients, perform a backward pass, and update the weights.
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

0 646.1837768554688
0
1
2
1 624.7628784179688
2 563.4927978515625
0
1
3 626.2626953125
0
4 631.3937377929688
0
1
5 623.3583984375
0
1
6 620.7235107421875
0
7 610.9720458984375
0
1
8 613.8134765625
0
1
2
9 620.3606567382812
0
1
10 606.0026245117188
0
1
11 601.18603515625
0
1
12 595.4243774414062
0
13 561.39794921875
0
1
14 582.379150390625
15 283.6065673828125
16 264.99932861328125
0
1
17 560.0285034179688
0
1
18 550.5244750976562
0
1
2
19 600.944091796875
0
20 479.3553161621094
0
21 458.43206787109375
0
22 428.9304504394531
0
1
2
23 571.8104858398438
0
24 359.0577697753906
25 132.24212646484375
26 123.10090637207031
0
1
2
27 513.3256225585938
28 97.43161010742188
29 83.44444274902344
30 67.561767578125
31 51.845882415771484
0
32 194.72825622558594
0
1
2
33 432.345703125
0
34 160.26953125
0
1
2
35 387.0917053222656
0
1
2
36 358.75018310546875
37 58.29304504394531
0
1
2
38 295.6631774902344
0
39 123.5028076171875
0
1
40 183.4274139404297
41 74.0772933959961
0
42 105.82907104492188
0
1
43

In [3]:
class ScipyConv2dFunction(Function):

    def forward(self, input, filter):
        result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
        self.save_for_backward(input, filter)
        return torch.FloatTensor(result)

    def backward(self, grad_output):
        input, filter = self.saved_tensors
        grad_input = convolve2d(grad_output.numpy(), filter.t().numpy(), mode='full')
        grad_filter = convolve2d(input.numpy(), grad_output.numpy(), mode='valid')
        return torch.FloatTensor(grad_input), torch.FloatTensor(grad_filter)


class ScipyConv2d(Module):

    def __init__(self, kh, kw):
        super(ScipyConv2d, self).__init__()
        self.filter = Parameter(torch.randn(kh, kw))

    def forward(self, input):
        return ScipyConv2dFunction()(input, self.filter)

In [4]:
module = ScipyConv2d(3, 3)
print(list(module.parameters()))
input = Variable(torch.randn(10, 10), requires_grad=True)
output = module(input)
print(output)
output.backward(torch.randn(8, 8))
print(input.grad)

[Parameter containing:
-0.1020  1.5605 -1.0686
-0.2980  2.5237  1.7542
-0.4122 -1.3402  0.7986
[torch.FloatTensor of size 3x3]
]
Variable containing:
 -4.6548  -4.6930   6.2275   3.7410   0.9282   8.3057  -7.3934   0.9007
  2.3386  -0.6485  -3.1071   6.0794   0.9995  -0.5390   3.7227  -6.3905
 -5.2148   2.6129   0.3813   3.4625   6.2553   0.2443   1.3901   3.9625
  6.3458   3.4977   3.0102  -1.8656  -1.8380  -4.3040  -6.7021   2.1303
 -3.1746   4.6386   2.4804   3.4398   8.2589  -0.8090  -2.3347   0.3112
-12.5187  -1.3936  -2.9841  -5.7624   4.4910   4.3556   2.6807  -3.4560
 -2.0804  -1.7245  -3.4513  -3.2356   0.5486   1.5356   5.3615   3.6591
  0.6862   8.9628  -1.9815   0.8887  -0.2660  -4.4332  -3.6838  -1.5195
[torch.FloatTensor of size 8x8]

Variable containing:

Columns 0 to 7 
 -0.0151  -0.1755  -0.3303  -0.3054   0.1388  -0.4923  -0.0809  -0.0476
  0.3176   2.6824   1.8807  -2.3290   4.9381  -0.4209  -0.1795  -1.0871
 -1.4960  -3.9210   2.2281  -5.6227  -0.4100   2.0443   1.2