
PyTorch: Custom nn Modules
--------------------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

<strong style="color:red">This implementation defines the model as a custom Module subclass.</strong>
Whenever you want a model more complex than a simple sequence of existing Modules you will need to define your model this way.



In [1]:
%matplotlib inline

<h1 style="background-image: linear-gradient( 135deg, #ABDCFF 10%, #0396FF 100%);"> Orinal Tutorial code

In [2]:
import torch
from torch.autograd import Variable


class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.data[0])

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 709.3311767578125
1 654.7893676757812
2 608.1807250976562
3 567.7784423828125
4 531.8323364257812
5 499.66741943359375
6 470.4566345214844
7 443.8223571777344
8 419.05169677734375
9 396.0458679199219
10 374.43109130859375
11 354.20361328125
12 335.1230773925781
13 317.0469970703125
14 299.84332275390625
15 283.5288391113281
16 268.0382995605469
17 253.37509155273438
18 239.4454345703125
19 226.2010498046875
20 213.5829620361328
21 201.6224365234375
22 190.28240966796875
23 179.51385498046875
24 169.25572204589844
25 159.55300903320312
26 150.33572387695312
27 141.61219787597656
28 133.38975524902344
29 125.64966583251953
30 118.36223602294922
31 111.47442626953125
32 104.9749984741211
33 98.85650634765625
34 93.07905578613281
35 87.63988494873047
36 82.53087615966797
37 77.69802856445312
38 73.15835571289062
39 68.889404296875
40 64.88677215576172
41 61.11894989013672
42 57.58547592163086
43 54.261871337890625
44 51.14573669433594
45 48.219093322753906
46 45.471214294433594
47 42.886

476 8.11725840321742e-06
477 7.885952072683722e-06
478 7.65842469263589e-06
479 7.440140507242177e-06
480 7.225977242342196e-06
481 7.0188575591600966e-06
482 6.818445399403572e-06
483 6.622934051847551e-06
484 6.4338132688135374e-06
485 6.249259513424477e-06
486 6.06979165240773e-06
487 5.896843504160643e-06
488 5.727140433009481e-06
489 5.5636855904594995e-06
490 5.403326667874353e-06
491 5.249598871159833e-06
492 5.09859864905593e-06
493 4.952773451805115e-06
494 4.8109823183040135e-06
495 4.673816874856129e-06
496 4.540507688943762e-06
497 4.4102980609750375e-06
498 4.283742782718036e-06
499 4.1610874177422374e-06
