In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
x = torch.ones(2, 2, requires_grad=True)

y = x + 2
z = y * y * 3

out = z.mean()

out.backward(torch.tensor(1, dtype=torch.float), retain_graph=True)

In [3]:
print(x, y, z, out, x.grad, sep = '\n')

tensor([[ 1.,  1.],
        [ 1.,  1.]])
tensor([[ 3.,  3.],
        [ 3.,  3.]])
tensor([[ 27.,  27.],
        [ 27.,  27.]])
tensor(27.)
tensor([[ 4.5000,  4.5000],
        [ 4.5000,  4.5000]])


In [4]:
import torch
from torch.autograd import Variable

In [8]:
x = Variable(torch.FloatTensor([3]), requires_grad = False)
y_true = torch.FloatTensor([250])

w1 = Variable(torch.FloatTensor([200]), requires_grad = True)
w2 = Variable(torch.FloatTensor([300]), requires_grad = True)

learning_rate = 3e-7

In [6]:
print('Summary:\n y_true = {}\n y_pred = {}'.format(int(y_true), int(x * w1 * w2.data)))

Summary:
 y_true = 250
 y_pred = 180000


`y = x * w1 * w2`  
`loss = (y - y_true)^2`  
`dLoss/dy = 2 * (y - y_true)`

`dLoss/d(w1) = dLoss/dy * dy / d(w1)`  
`dLoss/d(w1) = 2 * (y - y_true) * dy / d(w1)`  
`dLoss/d(w1) = 2 * (y - y_true) * x * w2`  
`dLoss/d(w1) |x=3, w2 = 300| = 2 * (y - y_true) * 3 * 300`

`dLoss/d(w2) = dLoss/dy * dy / d(w2)`  
`dLoss/d(w2) = 2 * (y - y_true) * dy / d(w2)`  
`dLoss/d(w2) = 2 * (y - y_true) * x * w1`  
`dLoss/d(w2) |x=3, w2 = 200| = 2 * (y - y_true) * 3 * 200`  

`dy/d(w1) = x * w2`  
`dy/d(w2) = x * w1`

In [7]:
for i in range(0,2):
    y = x * w1 * w2
    dLoss = 2 * (y - y_true)
    y.backward(dLoss, retain_graph=True)
    
    print('''Manual:
       dy/d(w1) = {}
       dLoss/d(w1) = {}
       dy/d(w2) = {}
       dLoss/d(w2) = {}
      '''.format(float((3 * w2)), float((dLoss * 3 * w2)),
               float((3 * w1)), float((dLoss * 3 * w1))))
    print('Autograd:\n\tdLoss/d(w1) = {}\n\tdLoss/d(w2) = {}\n'.format(float(w1.grad), float(w2.grad)))


    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data
    print('Summary:\n dLoss = {}\n delta W1 = {}\n delta W2 = {}\n y_true = {}\n y_pred = {}\n'.format(float(dLoss), float(learning_rate * w1.grad.data), float(learning_rate * w2.grad.data), float(y_true), float(x * w1 * w2.data)))
    w1.grad.zero_()
    w2.grad.zero_()

Manual:
       dy/d(w1) = 900.0
       dLoss/d(w1) = 323550016.0
       dy/d(w2) = 600.0
       dLoss/d(w2) = 215700000.0
      
Autograd:
	dLoss/d(w1) = 323550016.0
	dLoss/d(w2) = 215700000.0

Summary:
 dLoss = 359500.0
 delta W1 = 97.06501007080078
 delta W2 = 64.70999908447266
 y_true = 250.0
 y_pred = 72658.71875

Manual:
       dy/d(w1) = 705.8699951171875
       dLoss/d(w1) = 102222288.0
       dy/d(w2) = 308.8049621582031
       dLoss/d(w2) = 44720344.0
      
Autograd:
	dLoss/d(w1) = 102222288.0
	dLoss/d(w2) = 44720344.0

Summary:
 dLoss = 144817.4375
 delta W1 = 30.66668701171875
 delta W2 = 13.41610336303711
 y_true = 250.0
 y_pred = 48103.34765625



In [9]:
criterion = nn.MSELoss()

for i in range(0,10):
    y = x * w1 * w2
    loss = criterion(y, y_true)
    loss.backward(retain_graph=True)

    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data
    print('Summary:\n Loss = {}\n delta W1 = {}\n delta W2 = {}\n y_true = {}\n y_pred = {}\n'.format(float(loss), float(learning_rate * w1.grad.data), float(learning_rate * w2.grad.data), float(y_true), float(x * w1 * w2.data)))
    w1.grad.zero_()
    w2.grad.zero_()

Summary:
 Loss = 32310063104.0
 delta W1 = 97.06501007080078
 delta W2 = 64.70999908447266
 y_true = 250.0
 y_pred = 72658.71875

Summary:
 Loss = 5243022336.0
 delta W1 = 30.66668701171875
 delta W2 = 13.41610336303711
 y_true = 250.0
 y_pred = 48103.34765625

Summary:
 Loss = 2289942784.0
 delta W1 = 19.111337661743164
 delta W2 = 6.2249040603637695
 y_true = 250.0
 y_pred = 34389.7421875

Summary:
 Loss = 1165522048.0
 delta W1 = 13.251962661743164
 delta W2 = 3.2665774822235107
 y_true = 250.0
 y_pred = 25425.3671875

Summary:
 Loss = 633799104.0
 delta W1 = 9.624250411987305
 delta W2 = 1.8083217144012451
 y_true = 250.0
 y_pred = 19129.03125

Summary:
 Loss = 356417824.0
 delta W1 = 7.155783653259277
 delta W2 = 1.029008388519287
 y_true = 250.0
 y_pred = 14537.17578125

Summary:
 Loss = 204123392.0
 delta W1 = 5.388854026794434
 delta W2 = 0.5947030782699585
 y_true = 250.0
 y_pred = 11117.9072265625

Summary:
 Loss = 118111408.0
 delta W1 = 4.087536811828613
 delta W2 = 0.34695