In [None]:
# torch.rand: This function generates random numbers from a uniform distribution 
# over the range [0, 1). It takes in dimensions as arguments and returns a 
# tensor with values sampled from a uniform distribution between 0 and 1.

# torch.randn: This function generates random numbers from a standard normal (Gaussian) 
# distribution with mean 0 and standard deviation 1. 
# It also takes in dimensions as arguments and returns a 
# tensor with values sampled from this distribution.

In [3]:
import torch

x = torch.arange(4.0, requires_grad=True)
x

tensor([0., 1., 2., 3.], requires_grad=True)

In [7]:
print(x.grad)

y = torch.dot(x,x)
print(y)

y.backward()
print(x.grad)

x.grad == 2*x

None
tensor(14., grad_fn=<DotBackward0>)
tensor([0., 2., 4., 6.])


tensor([True, True, True, True])

In [1]:
import torch
# The autograd package provides automatic differentiation 
# for all operations on Tensors

# requires_grad = True -> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

tensor([-1.0349,  0.9648, -0.5830], requires_grad=True)
tensor([0.9651, 2.9648, 1.4170], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7bc7ab2cbc10>


In [2]:
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

# Let's compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor

z.backward()
print(x.grad) # dz/dx

tensor([ 2.7944, 26.3710,  6.0238], grad_fn=<MulBackward0>)
tensor(11.7297, grad_fn=<MeanBackward0>)
tensor([1.9303, 5.9297, 2.8340])


In [3]:
# keep in mind if we have a single value then only .backward worksabs

# Generally speaking, torch.autograd is an engine for computing vector-Jacobian product
# It computes partial derivates while applying the chain rule



# Model with non-scalar output:
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
# specify a gradient argument that is a tensor of matching shape.
# needed for vector-Jacobian product

In [7]:
x = torch.ones(3, requires_grad=True)

y = x * 2
for _ in range(10):
    y = y * 2

print(y)
print(y.shape)

v = torch.tensor([1, 1, 1], dtype=torch.float32)
y.backward(v)
print(x.grad)

tensor([2048., 2048., 2048.], grad_fn=<MulBackward0>)
torch.Size([3])
tensor([2048., 2048., 2048.])


In [9]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    
    print(weights.grad)

    # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad

    # this is important! It affects the final weights & output
    weights.grad.zero_()

print(weights)
print(model_output)


# Optimizer has zero_grad() method
# optimizer = torch.optim.SGD([weights], lr=0.1)
# During training:
# optimizer.step()
# optimizer.zero_grad()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)
