# PyTorch Basics

## Init, helpers, utils, ...

In [2]:
%matplotlib inline

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

In [4]:
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
from IPython.core.debugger import set_trace

# Tensors
tensors - the atoms of machine learning

## Tensors in numpy and pytorch

In [5]:
import numpy as np
from numpy.linalg import inv
from numpy.linalg import multi_dot as mdot

In [6]:
# numpy
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [7]:
# torch
torch.eye(3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [8]:
# numpy
X = np.random.random((5, 3))
X

array([[0.00312978, 0.165447  , 0.03888296],
       [0.60600685, 0.59181157, 0.75366668],
       [0.45594334, 0.02011842, 0.87961916],
       [0.05319178, 0.8845105 , 0.61772618],
       [0.07897388, 0.31597133, 0.69900588]])

In [17]:
# pytorch
Y = torch.rand((5, 3))
Y

tensor([[0.7300, 0.7018, 0.2601],
        [0.1506, 0.4892, 0.9874],
        [0.9033, 0.3847, 0.9157],
        [0.7148, 0.9781, 0.9281],
        [0.9621, 0.1599, 0.4295]])

In [18]:
X.shape

(5, 3)

In [19]:
Y.shape

torch.Size([5, 3])

In [20]:
# numpy
X.T @ X

array([[0.58420467, 0.44033471, 0.94596653],
       [0.44033471, 1.26021511, 1.23740939],
       [0.94596653, 1.23740939, 2.21345008]])

In [21]:
# torch
Y.t() @ Y

tensor([[2.8080, 1.7865, 2.2423],
        [1.7865, 1.8621, 1.9942],
        [2.2423, 1.9942, 2.9268]])

In [22]:
# numpy
inv(X.T @ X)

array([[ 6.01876353,  0.93702967, -3.09608886],
       [ 0.93702967,  1.90504317, -1.4654575 ],
       [-3.09608886, -1.4654575 ,  2.59421586]])

In [23]:
# torch
torch.inverse(Y.t() @ Y)

tensor([[ 1.0446, -0.5369, -0.4345],
        [-0.5369,  2.2630, -1.1306],
        [-0.4345, -1.1306,  1.4449]])

## More on PyTorch Tensors

Operations are also available as methods.

In [24]:
A = torch.eye(3)
A.add(1)

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

In [25]:
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

Any operation that mutates a tensor in-place has a `_` suffix.

In [26]:
A.add_(1)
A

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

## Indexing and broadcasting
It works as expected/like numpy:

In [27]:
A[0, 0]

tensor(2.)

In [28]:
A[0]

tensor([2., 1., 1.])

In [29]:
A[0:2]

tensor([[2., 1., 1.],
        [1., 2., 1.]])

In [30]:
A[:, 1:3]

tensor([[1., 1.],
        [2., 1.],
        [1., 2.]])

## Converting

In [31]:
A = torch.eye(3)
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [32]:
# torch --> numpy
B = A.numpy()
B

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

Note: torch and numpy can share the same memory / zero-copy

In [33]:
A.add_(.5)
A

tensor([[1.5000, 0.5000, 0.5000],
        [0.5000, 1.5000, 0.5000],
        [0.5000, 0.5000, 1.5000]])

In [34]:
B

array([[1.5, 0.5, 0.5],
       [0.5, 1.5, 0.5],
       [0.5, 0.5, 1.5]], dtype=float32)

In [35]:
# numpy --> torch
torch.from_numpy(np.eye(3))

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], dtype=torch.float64)

## Much more

In [36]:
[o for o  in dir(torch) if not o.startswith("_")]

['AVG',
 'AggregationType',
 'Argument',
 'ArgumentSpec',
 'Block',
 'BoolStorage',
 'BoolTensor',
 'BoolType',
 'ByteStorage',
 'ByteTensor',
 'CharStorage',
 'CharTensor',
 'Code',
 'CompilationUnit',
 'CompleteArgumentSpec',
 'DictType',
 'DoubleStorage',
 'DoubleTensor',
 'ExecutionPlanState',
 'ExtraFilesMap',
 'FatalError',
 'FileCheck',
 'FloatStorage',
 'FloatTensor',
 'FloatType',
 'Function',
 'FunctionSchema',
 'Future',
 'Generator',
 'Gradient',
 'Graph',
 'GraphExecutorState',
 'HalfStorage',
 'HalfStorageBase',
 'HalfTensor',
 'IODescriptor',
 'IntStorage',
 'IntTensor',
 'IntType',
 'JITException',
 'ListType',
 'LockingLogger',
 'LoggerBase',
 'LongStorage',
 'LongTensor',
 'Node',
 'NoopLogger',
 'NumberType',
 'OptionalType',
 'PyTorchFileReader',
 'PyTorchFileWriter',
 'SUM',
 'ScriptMethod',
 'ScriptModule',
 'ShortStorage',
 'ShortTensor',
 'Size',
 'Storage',
 'StringType',
 'Tensor',
 'TensorType',
 'TracingState',
 'TupleType',
 'Type',
 'Use',
 'Value',
 'abs'

In [37]:
[o for o  in dir(A) if not o.startswith("_")]

['abs',
 'abs_',
 'acos',
 'acos_',
 'add',
 'add_',
 'addbmm',
 'addbmm_',
 'addcdiv',
 'addcdiv_',
 'addcmul',
 'addcmul_',
 'addmm',
 'addmm_',
 'addmv',
 'addmv_',
 'addr',
 'addr_',
 'all',
 'allclose',
 'any',
 'apply_',
 'argmax',
 'argmin',
 'argsort',
 'as_strided',
 'as_strided_',
 'asin',
 'asin_',
 'atan',
 'atan2',
 'atan2_',
 'atan_',
 'backward',
 'baddbmm',
 'baddbmm_',
 'bernoulli',
 'bernoulli_',
 'bincount',
 'bmm',
 'btrifact',
 'btrifact_with_info',
 'btrisolve',
 'byte',
 'cauchy_',
 'ceil',
 'ceil_',
 'char',
 'cholesky',
 'cholesky_inverse',
 'cholesky_solve',
 'chunk',
 'clamp',
 'clamp_',
 'clamp_max',
 'clamp_max_',
 'clamp_min',
 'clamp_min_',
 'clone',
 'coalesce',
 'contiguous',
 'copy_',
 'cos',
 'cos_',
 'cosh',
 'cosh_',
 'cpu',
 'cross',
 'cuda',
 'cumprod',
 'cumsum',
 'data',
 'data_ptr',
 'dense_dim',
 'dequantize',
 'det',
 'detach',
 'detach_',
 'device',
 'diag',
 'diag_embed',
 'diagflat',
 'diagonal',
 'digamma',
 'digamma_',
 'dim',
 'dist',
 

# But what about the GPU?
How do I use the GPU?

If you have a GPU make sure that the right pytorch is installed
(check https://pytorch.org/ for details).

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

If you have a GPU you should get something like: 
`device(type='cuda', index=0)`

You can move data to the GPU by doing `.to(device)`.

In [39]:
data = torch.eye(3)
data.to(device)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

Now the computation happens on the GPU.

In [40]:
res = data + data
res

tensor([[2., 0., 0.],
        [0., 2., 0.],
        [0., 0., 2.]])

In [41]:
res.device

device(type='cpu')

Note: before `v0.4` one had to use `.cuda()` and `.cpu()` to move stuff to and from the GPU.
This littered the code with many:
```python
if CUDA:
    model = model.cuda()
```

# Automatic differentiation with `autograd`
Prior to `v0.4` PyTorch used the class `Variable` to record gradients. You had to wrap `Tensor`s in `Variable`s.
`Variable`s behaved exactly like `Tensors`.

With `v0.4` `Tensor` can record gradients directly if you tell it do do so, e.g. `torch.ones(3, requires_grad=True)`.
There is no need for `Variable` anymore.
Many tutorials still use `Variable`, be aware!

Ref:
- https://pytorch.org/docs/stable/autograd.html
- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

You rarely use `torch.autograd` directly.
Pretty much everything is part or `torch.Tensor` now.
Simply add `requires_grad=True` to the tensors you want to calculate the gradients for.
`nn.Module` track gradients automatically.

In [62]:
from torch import autograd

In [63]:
x = torch.tensor(2.)
x

tensor(2.)

In [64]:
x = torch.tensor(2., requires_grad=True)
x

tensor(2., requires_grad=True)

In [65]:
print(x.requires_grad)

True


In [66]:
print(x.grad)

None


In [67]:
y = x ** 2

print("Grad of x:", x.grad)

Grad of x: None


In [68]:
y = x ** 2
y.backward()

print("Grad of x:", x.grad)

Grad of x: tensor(4.)


In [69]:
# What is going to happen here?
x = torch.tensor(2.)
x.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [73]:
# Don't record the gradient
# Useful for inference

params = torch.tensor(2., requires_grad=True)

with torch.no_grad():
    y = x * x
    print(x.grad_fn)

None


`nn.Module` and `nn.Parameter` keep track of gradients for you.

In [74]:
lin = nn.Linear(2, 1, bias=True)
lin.weight

Parameter containing:
tensor([[ 0.6877, -0.4699]], requires_grad=True)

In [75]:
type(lin.weight)

torch.nn.parameter.Parameter

In [76]:
isinstance(lin.weight, torch.FloatTensor)

True

# Exercise
- Do you remember the analytical solution to solve for the parameters of linear regression? Implement it.