# Defining New Autograd Functions

In [4]:
%%time
import torch
from torch.autograd import Variable

class MyRelu(torch.autograd.Function):
    """
    Implementing Custom grads
    """
    
    @staticmethod
    def forward(ctx, input):
        # ctx is  a cache object in which we can store 
        # current values of forward activation
        ctx.save_for_backward(input)
        return input.clamp(min=0)
    
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input
    

H_size, In_size, Out_size = 4, 5, 2
    
W1 = Variable(torch.rand(H_size, In_size), requires_grad = True)
W2 = Variable(torch.rand(Out_size, H_size), requires_grad = True)

M = 1000 # no of training examples

x_in = Variable(torch.rand(In_size, M), requires_grad = False) 
y = Variable(torch.rand(Out_size, M), requires_grad = False)

learning_rate = 1e-6

for t in range(1000):
    relu = MyRelu.apply
    # forward pass
    out = W2.mm(relu(W1.mm(x_in)))
    
    # calculate loss
    loss = (out - y).pow(2).sum()
    
    if t%100==0:
        print("loss {0} at run {1}".format(loss.data[0], t))
    
    # backprop
    loss.backward()
    
    # param update
    W1.data -= learning_rate * W1.grad.data
    W2.data -= learning_rate * W2.grad.data
    
    # setting in graph grads zero
    W1.grad.data.zero_()
    W2.grad.data.zero_()
    #repeat

loss 8228.85644531 at run 0
loss 863.779418945 at run 100
loss 354.865844727 at run 200
loss 277.007415771 at run 300
loss 260.123199463 at run 400
loss 254.526748657 at run 500
loss 251.433700562 at run 600
loss 249.037063599 at run 700
loss 246.916259766 at run 800
loss 244.953201294 at run 900
CPU times: user 232 ms, sys: 8 ms, total: 240 ms
Wall time: 233 ms
