In [1]:
import torch

In [2]:
x = torch.tensor([5.5, 3])
x

tensor([5.5000, 3.0000])

In [3]:
x = x.new_ones(5,3, dtype=torch.double)
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

In [4]:
x = torch.rand_like(x, dtype=torch.float)
x, x.dtype

(tensor([[0.8635, 0.7094, 0.7579],
         [0.0830, 0.0700, 0.7829],
         [0.0738, 0.2411, 0.8852],
         [0.7942, 0.1388, 0.0436],
         [0.8622, 0.6788, 0.8502]]),
 torch.float32)

In [5]:
x.size(), x.shape

(torch.Size([5, 3]), torch.Size([5, 3]))

In [6]:
y = torch.rand(5,3)
y.add_(x)

tensor([[1.6555, 1.2021, 0.7745],
        [0.1729, 0.4441, 1.2499],
        [0.6010, 0.7910, 1.0088],
        [1.7368, 0.3662, 0.9158],
        [0.8807, 1.4894, 1.5519]])

In [7]:
x, x.numpy()

(tensor([[0.8635, 0.7094, 0.7579],
         [0.0830, 0.0700, 0.7829],
         [0.0738, 0.2411, 0.8852],
         [0.7942, 0.1388, 0.0436],
         [0.8622, 0.6788, 0.8502]]),
 array([[0.863468  , 0.70938444, 0.75786984],
        [0.08300567, 0.07003778, 0.7829294 ],
        [0.07380503, 0.24108535, 0.8852315 ],
        [0.7942133 , 0.13883114, 0.04357749],
        [0.86220884, 0.67878234, 0.8501622 ]], dtype=float32))

In [8]:
torch.cuda.is_available()

False

### Autograd

- `autograd` package provides automatic differentiation to all operations in PyTorch
- `.requires_grad` is an **attribute** to `torch.Tensor`, which is set to `True` tracks all operations on it.
- After finishing the computation we can call `.backward()` and all the gradients will be calculated automatically.
- The gradient will be accumulated to `.grad` attribute
- `.detach()` prevents future computation on Tensor from being tracked.

- Each Tensor has an attribute `.grad_fn` that references a `Function` that has created the Tensor.
- In case of user defined Tensord `.grad_fn` does not exist.

In [9]:
x = torch.ones(2,2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [10]:
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [11]:
y.grad_fn, y.grad

(<AddBackward0 at 0x7f861e8f7050>, None)

In [12]:
z = y*y*3
out = z.mean()
z, out

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>))

- **`.requires_grad_()`**

In [13]:
a = torch.rand(2,2)
a = ((a*3) / (a-1))
a.requires_grad

False

In [14]:
a.requires_grad_(True)
a.requires_grad

True

In [15]:
b = (a*a).sum()
b.grad_fn

<SumBackward0 at 0x7f861e8f7650>

#### Gradients

In [16]:
out.backward()

In [17]:
x.grad # d(out)/dx

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [23]:
# torch.autograd??

![title](/Users/mrigank/Desktop/Screenshots/autograd.png)

- *vector*-**Jacobian** product

In [27]:
x = torch.rand(3, requires_grad=True)
x

tensor([0.4775, 0.0596, 0.3169], requires_grad=True)

In [30]:
y = x * 2
y, y.data, y.data.norm(), torch.sqrt(torch.sum(torch.pow(y,2))) # L2 or Eucledian Norm

(tensor([0.9549, 0.1192, 0.6338], grad_fn=<MulBackward0>),
 tensor([0.9549, 0.1192, 0.6338]),
 tensor(1.1523),
 tensor(1.1523, grad_fn=<SqrtBackward>))

In [32]:
while y.data.norm() < 1000:
        y = y * 2
y

tensor([977.8286, 122.0110, 648.9987], grad_fn=<MulBackward0>)

In [33]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)

In [34]:
y.backward(v)

In [35]:
x.grad # dy/dx

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])

In [44]:
# Ways of stopping AUTOGRAD from tracking history

x.requires_grad, (x**2).requires_grad

(True, True)

In [47]:
with torch.no_grad():
    print((x**2).requires_grad), print(x.requires_grad)

# WHY ?

False
True


In [48]:
x.requires_grad

True

In [49]:
y = x.detach()
y.requires_grad

False

In [52]:
x.eq(y).all()

# eq(...)
#     eq(input, other, out=None) -> Tensor
    
#     Computes element-wise equality
    
#     The second argument can be a number or a tensor whose shape is
#     :ref:`broadcastable <broadcasting-semantics>` with the first argument.

tensor(True)

In [53]:
# help(torch.eq)