# Autograd 
## PyTorch: Tensors and autograd 

To automate the process of back and forward propagation in eural networks, we can use automatic differentiation. AUtograd package provides PyTorch with this functionality. The forward pass will define a computational graph, nodes in the graph will be Tensors, and egdes will be functions that produce output Tensors from input Tensors. Backprop through this graph then allows you to easily compute gradients. 



In [3]:
import torch 
dtype = torch.float
device = torch.device("cpu")

batch_size = 64
input_dimension = 1000
hidden_dimension = 100
output_dimension = 10

#Generate random tensors to hold inputs and outputs 
# Setting requires_grad = False indicates that we do not need to compute gradients
# with respect to these tensors during the backward pass 
x = torch.randn(batch_size, input_dimension, device=device, dtype=dtype)
y = torch.randn(batch_size, output_dimension, device=device, dtype=dtype)

#Generate random weights for tensors 
#Setting requires_grad=True indicates that we want to compute gradients
#with respect to these Tensors during the backward pass
weight1 = torch.randn(input_dimension, hidden_dimension, device=device, dtype=dtype, requires_grad=True)
weight2 = torch.randn(hidden_dimension, output_dimension, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for n in range(500):
    # Forward pass: compute predicted y using operations on Tensors
    # these are exactly the same operations we used to compute the forward pass
    # using Tensors, but we do not need to keep references to intermediate values
    # sine we are not implementing the backward pass by hand 
    y_pred = x.mm(weight1).clamp(min=0).mm(weight2)
    
    #Loss
    # Now loss is a Tensor of shape (1,)
    #loss.item() gets the a scalar value held in the loss 
    loss = (y_pred - y).pow(2).sum()
    print(n, loss.item())
    
    #Use autograd to compute backprop
    #This call will compute the gradient of loss with respect to all Tensors with requires_grad=True
    #After this call weight1.grad and weight2.grad will be Tensors holding the gradient
    # of the loss with respect to weight1 and weight2 respectively 
    loss.backward()
    
    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this in autograd
    #An alternative way is to operate on weight.data and weight.grad.data
    # tensor.data gives a tensor that shares the storage with tensor
    # but doesnt track history 
    # torch.optim.SGD can be used to achieve this
    
    with torch.no_grad():
        weight1 -= learning_rate * weight1.grad
        weight2 -= learning_rate * weight2.grad
        
        # Manually zero the gradients after updating weights 
        weight1.grad.zero_()
        weight2.grad.zero_()


0 52343052.0
1 62104132.0
2 64333412.0
3 44568704.0
4 18772802.0
5 6148066.5
6 2702086.0
7 1763478.625
8 1370367.0
9 1123478.25
10 939798.0625
11 795082.3125
12 678384.625
13 582988.4375
14 504195.28125
15 438517.78125
16 383282.0
17 336574.84375
18 296847.53125
19 262806.0625
20 233469.71875
21 208054.234375
22 185951.359375
23 166641.75
24 149727.609375
25 134852.6875
26 121731.015625
27 110149.6640625
28 99894.2890625
29 90755.5859375
30 82590.453125
31 75276.125
32 68714.3046875
33 62813.80078125
34 57497.40234375
35 52699.0703125
36 48358.3125
37 44426.83203125
38 40858.5390625
39 37616.5546875
40 34672.57421875
41 31993.349609375
42 29547.08984375
43 27312.873046875
44 25273.064453125
45 23406.07421875
46 21694.986328125
47 20124.40625
48 18681.275390625
49 17354.759765625
50 16134.103515625
51 15011.765625
52 13979.0087890625
53 13025.23046875
54 12143.1845703125
55 11327.1083984375
56 10571.8056640625
57 9872.275390625
58 9223.591796875
59 8621.6806640625
60 8062.98095703125
61

389 0.001833415706641972
390 0.0017718354938551784
391 0.001712558325380087
392 0.0016538570635020733
393 0.0015992210246622562
394 0.0015480640577152371
395 0.0014974677469581366
396 0.0014489905443042517
397 0.001401303568854928
398 0.0013557055499404669
399 0.0013124904362484813
400 0.0012692618183791637
401 0.0012288331054151058
402 0.0011907286243513227
403 0.0011523641878739
404 0.0011179442517459393
405 0.001082771341316402
406 0.0010500844800844789
407 0.0010182132245972753
408 0.0009867600165307522
409 0.0009563190978951752
410 0.0009297666256316006
411 0.0009021004079841077
412 0.0008751965360715985
413 0.0008483387064188719
414 0.0008225315250456333
415 0.0007995629566721618
416 0.0007764758192934096
417 0.0007547899731434882
418 0.0007324991747736931
419 0.0007134071784093976
420 0.0006926545174792409
421 0.0006724224658682942
422 0.0006537787849083543
423 0.0006364690489135683
424 0.0006198143237270415
425 0.0006023072637617588
426 0.0005864078411832452
427 0.0005708509124