# PyTorch Basics

## Init, helpers, utils, ...

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [2]:
import matplotlib.pyplot as plt
from pprint import pprint
import numpy as np

from IPython.core.debugger import set_trace

%matplotlib inline

# Tensors
tensors - the atoms of machine learning

## Tensors in numpy and pytorch

In [3]:
import numpy as np
from numpy.linalg import inv
from numpy.linalg import multi_dot as mdot

In [4]:
import torch

In [5]:
# numpy
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [6]:
# torch
torch.eye(3)

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]])

In [7]:
# numpy
X = np.random.random((5, 3))
X

array([[ 0.37962098,  0.73750421,  0.68270773],
       [ 0.34290434,  0.98897845,  0.72829118],
       [ 0.95294912,  0.01920797,  0.90197184],
       [ 0.08353149,  0.4070058 ,  0.42778951],
       [ 0.49699863,  0.81444519,  0.22918089]])

In [8]:
# pytorch
Y = torch.rand((5, 3))
Y

tensor([[ 0.8000,  0.5938,  0.8309],
        [ 0.8169,  0.8796,  0.5764],
        [ 0.5383,  0.8992,  0.7900],
        [ 0.9567,  0.0071,  0.6409],
        [ 0.6075,  0.5452,  0.9313]])

In [9]:
X.shape

(5, 3)

In [10]:
Y.shape

torch.Size([5, 3])

In [11]:
# numpy
X.T 

array([[ 0.37962098,  0.34290434,  0.95294912,  0.08353149,  0.49699863],
       [ 0.73750421,  0.98897845,  0.01920797,  0.4070058 ,  0.81444519],
       [ 0.68270773,  0.72829118,  0.90197184,  0.42778951,  0.22918089]])

In [12]:
# torch
Y.t() 

tensor([[ 0.8000,  0.8169,  0.5383,  0.9567,  0.6075],
        [ 0.5938,  0.8796,  0.8992,  0.0071,  0.5452],
        [ 0.8309,  0.5764,  0.7900,  0.6409,  0.9313]])

## More on PyTorch Tensors

Operations are also available as methods.

In [13]:
A = torch.eye(3)
A.add(1)

tensor([[ 2.,  1.,  1.],
        [ 1.,  2.,  1.],
        [ 1.,  1.,  2.]])

In [14]:
A

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]])

Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

## Indexing and broadcasting
It works as expected:

In [15]:
A[0, 0]

tensor(1.)

In [16]:
A[0]

tensor([ 1.,  0.,  0.])

In [17]:
A[0:2]

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.]])

In [18]:
A[:, 1:3]

tensor([[ 0.,  0.],
        [ 1.,  0.],
        [ 0.,  1.]])

## Converting

In [19]:
A = torch.eye(3)
A

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]])

In [20]:
# torch --> numpy
A.numpy()

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]], dtype=float32)

In [21]:
# numpy --> torch
torch.from_numpy(np.eye(3))

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]], dtype=torch.float64)

# But what about the GPU?
How do I use the GPU?


Check https://pytorch.org/ for details.

In [22]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

If you have a GPU you should get something like: 
`device(type='cuda', index=0)`

You can move data to the GPU by doing `.to(device)`.

In [23]:
data = torch.eye(3)
data.to(device)

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]], device='cuda:0')

Note: before `v0.4` one had to use `.cuda()` and `.cpu()` to move stuff to and from the GPU.
This littered the code with many:
```python
if CUDA:
    model = model.cuda()
```

![](dynamic_graph.gif)

# Computational Graph

b = w1 * a

c = w2 * a 

d = (w3 * b) + (w4 * c)

L = f(d)



![](images/cg.png)

* The computation graph is simply a data structure that allows you to efficiently apply the chain rule to compute gradients for all of your parameters.

![](images/cgb.png)

# Autograd
Prior to `v0.4` PyTorch used the class `Variable` to record gradients. You had to wrap `Tensor`s in `Variable`s.
`Variable`s behaved like `Tensors`.

With `v0.4` `Tensor` can record gradients directly if you tell it do do so, e.g. `torch.ones(3, requires_grad=True)`.
There is no need for `Variable` anymore.

Ref:
- https://pytorch.org/docs/stable/autograd.html
- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

In [24]:
from torch import autograd  # you rarely use it directly

In [25]:
w = torch.ones(1)
w.requires_grad

False

In [26]:
z = torch.ones(1) * 2
z.requires_grad

False

In [27]:
total = w + z
total

tensor([ 3.])

In [28]:
# What is going to happen here?
total.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [32]:
w = torch.ones(1, requires_grad=True)
w.requires_grad

True

In [33]:
total = w + z
total.requires_grad

True

In [34]:
total.backward()

In [35]:
w.grad

tensor([ 1.])