
PyTorch: nn
-----------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the network.
PyTorch autograd makes it easy to define computational graphs and take gradients,
<p style="color:red">but raw autograd can be a bit too low-level for defining complex neural networks;
this is where the nn package can help.</p> The nn package defines a set of Modules,
which you can think of as <strong style="color:red">a neural network layer that has produces output from
input and may have some trainable weights.</strong>

Source Link: http://pytorch.org/tutorials/beginner/examples_nn/two_layer_net_nn.html


In [1]:
%matplotlib inline

<h1 style="background-image: linear-gradient( 135deg, #ABDCFF 10%, #0396FF 100%);"> Orinal Tutorial code

In [2]:
import torch
from torch.autograd import Variable

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Variables for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 1e-4
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Variable of input data to the Module and it produces
    # a Variable of output data.
    y_pred = model(x)

    # Compute and print loss. We pass Variables containing the predicted and true
    # values of y, and the loss function returns a Variable containing the
    # loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.data[0])

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Variables with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Variable, so
    # we can access its data and gradients like we did before.
    for param in model.parameters():
        param.data -= learning_rate * param.grad.data

0 732.9182739257812
1 680.7958374023438
2 635.081787109375
3 594.8429565429688
4 558.852783203125
5 526.3436889648438
6 496.7332458496094
7 469.5028381347656
8 444.3468933105469
9 421.0668029785156
10 399.3390197753906
11 378.8065185546875
12 359.3966064453125
13 340.9505920410156
14 323.32647705078125
15 306.4247131347656
16 290.30389404296875
17 274.99212646484375
18 260.3421630859375
19 246.3540802001953
20 233.0052032470703
21 220.28074645996094
22 208.1381378173828
23 196.60569763183594
24 185.6157989501953
25 175.1521759033203
26 165.13548278808594
27 155.63172912597656
28 146.5999298095703
29 137.99583435058594
30 129.85894775390625
31 122.16447448730469
32 114.89279174804688
33 108.02578735351562
34 101.55492401123047
35 95.45748138427734
36 89.71915435791016
37 84.30313873291016
38 79.21401977539062
39 74.43704223632812
40 69.9603042602539
41 65.76519775390625
42 61.8353271484375
43 58.152671813964844
44 54.695621490478516
45 51.45276641845703
46 48.41431427001953
47 45.570137

495 5.0136750360252336e-06
496 4.8696601879782975e-06
497 4.729776719614165e-06
498 4.594629899656866e-06
499 4.461036496650195e-06


<h1 style="background-image: linear-gradient( 135deg, #ABDCFF 10%, #0396FF 100%);"> Without #annotation Version

In [4]:
import torch
from torch.autograd import Variable

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Use the nn package to define our model as a sequence of layers.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

# The nn package also contains definitions of popular loss functions
loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 1e-4
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x)

    # Compute and print loss. 
    loss = loss_fn(y_pred, y)
    print(t, loss.data[0])

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass
    loss.backward()

    # Update the weights using gradient descent.
    for param in model.parameters():
        param.data -= learning_rate * param.grad.data

0 643.7648315429688
1 594.0890502929688
2 551.3575439453125
3 513.9000244140625
4 480.87255859375
5 451.5255126953125
6 424.8398132324219
7 400.4261169433594
8 377.9382019042969
9 357.271728515625
10 338.1226501464844
11 320.20574951171875
12 303.2518310546875
13 287.2070007324219
14 271.9260559082031
15 257.4627685546875
16 243.72425842285156
17 230.61788940429688
18 218.16448974609375
19 206.342041015625
20 195.05458068847656
21 184.2813720703125
22 174.0470733642578
23 164.29884338378906
24 155.0045928955078
25 146.16156005859375
26 137.74839782714844
27 129.75119018554688
28 122.14928436279297
29 114.9527816772461
30 108.11542510986328
31 101.63982391357422
32 95.50341033935547
33 89.70565032958984
34 84.22037506103516
35 79.05567169189453
36 74.1976547241211
37 69.62667083740234
38 65.328369140625
39 61.296504974365234
40 57.507511138916016
41 53.94853591918945
42 50.605770111083984
43 47.464569091796875
44 44.520782470703125
45 41.767940521240234
46 39.18711853027344
47 36.768417

415 3.602004289859906e-05
416 3.5039814974879846e-05
417 3.4086002415278926e-05
418 3.3158474252559245e-05
419 3.2257161365123466e-05
420 3.1381292501464486e-05
421 3.053232649108395e-05
422 2.9703087420784868e-05
423 2.8898906748509035e-05
424 2.8116905014030635e-05
425 2.735624548222404e-05
426 2.6618710762704723e-05
427 2.589946052466985e-05
428 2.5200133677572012e-05
429 2.4521566956536844e-05
430 2.3861784939072095e-05
431 2.3221768060466275e-05
432 2.259662505821325e-05
433 2.1989380911691114e-05
434 2.139976277248934e-05
435 2.0825544197577983e-05
436 2.026962101808749e-05
437 1.972840618691407e-05
438 1.9201726900064386e-05
439 1.868485014711041e-05
440 1.8187707610195503e-05
441 1.77027250174433e-05
442 1.7231966921826825e-05
443 1.677339787420351e-05
444 1.6325964679708704e-05
445 1.589220300957095e-05
446 1.5469682693947107e-05
447 1.505804084445117e-05
448 1.4659090084023774e-05
449 1.4270526662585326e-05
450 1.3891894923290238e-05
451 1.3524968380806968e-05
452 1.316844645