In [1]:
%matplotlib inline


PyTorch: nn
-----------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the network.
PyTorch autograd makes it easy to define computational graphs and take gradients,
but raw autograd can be a bit too low-level for defining complex neural networks;
this is where the nn package can help. The nn package defines a set of Modules,
which you can think of as a neural network layer that has produces output from
input and may have some trainable weights.



In [2]:
import torch

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-4
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(x)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

0 661.4231567382812
1 605.1870727539062
2 557.556396484375
3 516.494873046875
4 480.4280090332031
5 448.2779846191406
6 419.43023681640625
7 393.6768798828125
8 370.36187744140625
9 348.9566650390625
10 329.0791320800781
11 310.60247802734375
12 293.3876953125
13 277.13568115234375
14 261.831298828125
15 247.3898162841797
16 233.70269775390625
17 220.748046875
18 208.47169494628906
19 196.81666564941406
20 185.78880310058594
21 175.31869506835938
22 165.38424682617188
23 155.98497009277344
24 147.0850372314453
25 138.6507110595703
26 130.6742706298828
27 123.11933898925781
28 115.95930480957031
29 109.19719696044922
30 102.81108856201172
31 96.77349853515625
32 91.07728576660156
33 85.72389221191406
34 80.70289611816406
35 75.98494720458984
36 71.5309829711914
37 67.34344482421875
38 63.4041862487793
39 59.69305419921875
40 56.20387268066406
41 52.92414855957031
42 49.83964920043945
43 46.94134521484375
44 44.219181060791016
45 41.656192779541016
46 39.24619674682617
47 36.980037689208