
PyTorch: Defining new autograd functions
----------------------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation computes the forward pass using operations on PyTorch
Variables, and uses PyTorch autograd to compute gradients.

In this implementation we implement our own custom autograd function to perform
the ReLU function.

Source Link: http://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html



In [1]:
%matplotlib inline

<h1 style="background-image: linear-gradient( 135deg, #ABDCFF 10%, #0396FF 100%);"> Orinal Tutorial code

In [2]:
import torch
from torch.autograd import Variable


class MyReLU(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        ctx.save_for_backward(input)
        return input.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input


dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # To apply our Function, we use Function.apply method. We alias this as 'relu'.
    relu = MyReLU.apply

    # Forward pass: compute predicted y using operations on Variables; we compute
    # ReLU using our custom autograd operation.
    y_pred = relu(x.mm(w1)).mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.data[0])

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data

    # Manually zero the gradients after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()

0 33014076.0
1 30972098.0
2 29985770.0
3 26333562.0
4 19902930.0
5 12834515.0
6 7556588.5
7 4390074.5
8 2710298.75
9 1830699.625
10 1347037.0
11 1055569.625
12 861751.0625
13 721523.4375
14 613950.875
15 527829.1875
16 457215.1875
17 398364.21875
18 348696.21875
19 306496.15625
20 270402.875
21 239380.890625
22 212616.8125
23 189370.125
24 169119.125
25 151422.421875
26 135919.578125
27 122262.109375
28 110216.265625
29 99577.8046875
30 90141.953125
31 81741.2109375
32 74248.6484375
33 67554.71875
34 61567.79296875
35 56200.31640625
36 51373.94921875
37 47025.05859375
38 43100.0234375
39 39546.1875
40 36327.53125
41 33409.03515625
42 30757.60546875
43 28346.85546875
44 26151.0390625
45 24147.720703125
46 22317.681640625
47 20642.767578125
48 19108.57421875
49 17702.66015625
50 16413.119140625
51 15229.0400390625
52 14140.33984375
53 13138.51171875
54 12215.564453125
55 11364.8115234375
56 10579.89453125
57 9854.7900390625
58 9184.501953125
59 8564.5986328125
60 7990.62646484375
61 7459

460 0.00021031788492109627
461 0.0002061791019514203
462 0.00020195210527163
463 0.00019812598475255072
464 0.00019394831906538457
465 0.00019033234275411814
466 0.00018701166845858097
467 0.0001830069231800735
468 0.0001798015582608059
469 0.00017638066492509097
470 0.00017323627253063023
471 0.00016992753080558032
472 0.00016698843683116138
473 0.00016396869614254683
474 0.0001605374418431893
475 0.00015738226647954434
476 0.0001546780113130808
477 0.00015191755665000528
478 0.00014942808775231242
479 0.00014680263120681047
480 0.00014404243847820908
481 0.0001417940075043589
482 0.00013912047143094242
483 0.00013693823711946607
484 0.00013444662909023464
485 0.00013254550867713988
486 0.0001297335111303255
487 0.00012766171130351722
488 0.00012503366451710463
489 0.00012348050950095057
490 0.00012129628157708794
491 0.00011923334386665374
492 0.00011711245315382257
493 0.00011535739758983254
494 0.00011370572610758245
495 0.00011150680074933916
496 0.00010982639651047066
497 0.00010