## PyTorch Tutorial 03 - Gradient Calculation With Autograd

In [None]:
import torch

In [3]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 0.4499, -1.2868, -2.2760], requires_grad=True)


In [5]:
y = x+2

### Forward Backward Prop

When we calculate y above it leads to the creation of a computational graph.

--> Forward pass calcualtes y
x
  + y
2

<-- Backward pass gives us dy/dx

In [6]:
print(y) # We can see that grad_fn is <AddBackward0> as we performed + on x to get y


tensor([ 2.4499,  0.7132, -0.2760], grad_fn=<AddBackward0>)


In [14]:
# Let's experiment with more, we can see different gradient functions being picked automatically by PyTorch
z = y**2
print(z)
z = y*y
# z = z.mean() # If we dont apply mean our z will be vector and z.backward() will throw error
print(z)
# To fix that we will need to give z a gradient vector
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
z.backward(v) # Here we pass v to z.backward()
x.grad

tensor([6.0022, 0.5086, 0.0762], grad_fn=<PowBackward0>)
tensor([6.0022, 0.5086, 0.0762], grad_fn=<MulBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [23]:
# How to prevent x from being updated by gradient descent
x = torch.randn(3, requires_grad=True)
print(x)
# We can set 
x.requires_grad_(False) # Sets inplace
print(x)
# OR
x.detach() # Use with trailing _ to set in place
print(x)

tensor([ 0.4614, -2.2165, -0.2881], requires_grad=True)
tensor([ 0.4614, -2.2165, -0.2881])
tensor([ 0.4614, -2.2165, -0.2881])


### Training Example

In [25]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()

    model_output.backward()

    print(weights.grad)

    # We can see below that weights get accumulated hence we must bring them back to 0


tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [27]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()

    model_output.backward()

    print(weights.grad)

    weights.grad.zero_() # Here we can see the difference weights remain the same

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [28]:
# Next we will look at built in optimizer

weights = torch.ones(4, requires_grad=True)

optimizer = torch.optim.SGD(weights, lr = 0.01) # Throws error

optimizer.step()
optimizer.zero_grad() # Same thing as above we did for weights

TypeError: params argument given to the optimizer should be an iterable of Tensors or dicts, but got torch.FloatTensor