# PyTorch Basics

## Init, helpers, utils, ...

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [4]:
import matplotlib.pyplot as plt
from pprint import pprint
import numpy as np

from IPython.core.debugger import set_trace

%matplotlib inline

In [5]:
from ppt.utils import attr

ImportError: No module named ppt.utils

# Tensors
tensors - the atoms of machine learning

## Tensors in numpy and pytorch

In [None]:
import numpy as np
from numpy.linalg import inv
from numpy.linalg import multi_dot as mdot

In [None]:
import torch

In [None]:
# numpy
np.eye(3)

In [None]:
# torch
torch.eye(3)

In [None]:
# numpy
X = np.random.random((5, 3))
X

In [None]:
# pytorch
Y = torch.rand((5, 3))
Y

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
# numpy
X.T @ X

In [None]:
# torch
Y.t() @ Y

In [None]:
# numpy
inv(X.T @ X)

In [None]:
# torch
torch.inverse(Y.t() @ Y)

## More on PyTorch Tensors

Operations are also available as methods.

In [None]:
A = torch.eye(3)
A.add(1)

In [None]:
A

Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

In [None]:
A.add_(1)
A

## Indexing and broadcasting
It works as expected:

In [None]:
A[0, 0]

In [None]:
A[0]

In [None]:
A[0:2]

In [None]:
A[:, 1:3]

## Converting

In [None]:
A = torch.eye(3)
A

In [None]:
# torch --> numpy
A.numpy()

In [None]:
# numpy --> torch
torch.from_numpy(np.eye(3))

# Autograd
Prior to `v0.4` PyTorch used the class `Variable` to record gradients. You had to wrap `Tensor`s in `Variable`s.
`Variable`s behaved like `Tensors`.

With `v0.4` `Tensor` can record gradients directly if you tell it do do so, e.g. `torch.ones(3, requires_grad=True)`.
There is no need for `Variable` anymore.

Ref:
- https://pytorch.org/docs/stable/autograd.html
- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

In [None]:
from torch import autograd  # you rarely use it directly

In [None]:
w = torch.ones(1)
w.requires_grad

In [None]:
z = torch.ones(1) * 2
z.requires_grad

In [None]:
total = w + z
total

In [None]:
# What is going to happen here?
total.backward()

In [None]:
w = torch.ones(1, requires_grad=True)
w.requires_grad

In [None]:
total = w + z
total.requires_grad

In [None]:
total.backward()

In [None]:
w.grad

In [None]:
with torch.no_grad():
    total = w + z

total.requires_grad

# But what about the GPU?
How do I use the GPU?

If you have a GPU make sure that the right pytorch is installed

```
conda install pytorch torchvision cuda91 -c pytorch
```
Check https://pytorch.org/ for details.

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

If you have a GPU you should get something like: 
`device(type='cuda', index=0)`

You can move data to the GPU by doing `.to(device)`.

In [None]:
data = torch.eye(3)
data.to(device)

Note: before `v0.4` one had to use `.cuda()` and `.cpu()` to move stuff to and from the GPU.
This littered the code with many:
```python
if CUDA:
    model = model.cuda()
```

# LinReg with PyTorch, Gradient Descent, and GPU

In [None]:
from sklearn.datasets import make_regression

n_features = 1
n_samples = 100

X, y = make_regression(
    n_samples=n_samples,
    n_features=n_features,
    noise=10,
)

fix, ax = plt.subplots()
ax.plot(X, y, ".")

In [None]:
X = torch.from_numpy(X).float()
y = torch.from_numpy(y.reshape((n_samples, n_features))).float()

In [None]:
from torch import nn
from torch import optim


class LinReg(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.beta = nn.Linear(input_dim, 1)
        
    def forward(self, X):
        return self.beta(X)

In [None]:
# Move everything to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = LinReg(n_features).to(device)  # <-- here
optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

X, y = X.to(device), y.to(device)  # <-- here

In [None]:
# Train step
model.train()
optimizer.zero_grad()

y_ = model(X)
loss = criterion(y_, y)

loss.backward()
optimizer.step()

# Eval
model.eval()
with torch.no_grad():
    y_ = model(X)    

# Vis
fig, ax = plt.subplots()
ax.plot(X.cpu().numpy(), y_.cpu().numpy(), ".", label="pred")
ax.plot(X.cpu().numpy(), y.cpu().numpy(), ".", label="data")
ax.set_title(f"MSE: {loss.item():0.1f}")
ax.legend();

# Debugging

**Q: "No debugger for your code. What do you think?"**

**A: "I would NOT be able to code!"**

- Who does "print-line-debugging"?
- Who likes debugging in tensorflow?
- What is the intersection of those two groups?


## IPDB cheatsheet
IPython Debugger

Taken from http://frid.github.io/blog/2014/06/05/python-ipdb-cheatsheet/

- h(help): Print help

- n(ext): Continue execution until the next line in the current function is reached or it returns.
- s(tep): Execute the current line, stop at the first possible occasion (either in a function that is called or in the current function).
- r(eturn): Continue execution until the current function returns.

- r(eturn): Continue execution until the current function returns.
- a(rgs): Print the argument list of the current function.

In [None]:
from IPython.core.debugger import set_trace

In [None]:
def my_function(x):
    answer = 42
    set_trace()
    answer += x
    return answer

my_function(12)

## Example: debuging a NN

In [None]:
X = torch.rand((5, 3))
X

In [None]:
class MyModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(3, 1)
    
    def forward(self, X):
        # set_trace()
        x = self.lin(X)
        return X

    
model = MyModule()
y_ = model(X)

assert y_.shape == (5, 1), y_.shape

# Recap - what we learned so far
- Tensor like numpy
- No need to calculate derivatives - automatic differentiation!
- Use `nn.Module` to create your own networks
- `set_trace` is your friend!