
PyTorch?
================

An open Source Python-based framework for machine learning. It's one of the most widely used frameworks within the ML community because of its flexibility and speed.

- Documentation: https://pytorch.org/docs/stable/index.html
- Github: https://github.com/pytorch/pytorch


Important Modules for this course
----------------------------------------
- torch.Tensor
- torch.autograd
- torch.nn
- torch.optim
- torch.utils.data




### Start by importing ``torch``

In [1]:
import torch

# Tensors
- primary data structures in PyTorch
- can contain entries of only one datatype

Full documentation: https://pytorch.org/docs/stable/tensors.html

## Constructions 

There are essentially infinite number of ways to create tensors in PyTorch

The API lists even more possibilities:
https://pytorch.org/docs/stable/torch.html#tensor-creation-ops

### uninitialized tensors

In [2]:
# an unitialized tensor
x = torch.empty(5, 3)
x

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

###  randomly initialized tensors



In [3]:
x = torch.rand(5, 3)
x

tensor([[0.9175, 0.9810, 0.5501],
        [0.5881, 0.7703, 0.6173],
        [0.0240, 0.7135, 0.5862],
        [0.4087, 0.6107, 0.2073],
        [0.6974, 0.0728, 0.9829]])

### zero tensors

In [4]:
x = torch.zeros(5, 3, dtype=torch.int64)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

### constant tensors

In [5]:
x = torch.full((5, 3), fill_value=7)
x

tensor([[7, 7, 7],
        [7, 7, 7],
        [7, 7, 7],
        [7, 7, 7],
        [7, 7, 7]])

### tensors directly from data

In [6]:
x = torch.tensor([5.5, 3])
x

tensor([5.5000, 3.0000])

### tensors based on existing tensors

In [7]:
x = x.new_ones(5, 3)     # new_* methods take in sizes
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [8]:
x = torch.randn_like(x, dtype=torch.float32)    # override dtype!
x                          # result has the same size

tensor([[-2.0625, -1.3974, -0.6102],
        [ 0.2951,  2.1645,  0.5595],
        [-1.4108, -0.4200,  0.6974],
        [-1.4622,  0.1092, -0.3447],
        [ 0.9946,  0.7268,  0.3315]])

In [9]:
x = torch.randn_like(x)
x.dtype

torch.float32

### changing data type

In [10]:
x = x.to(torch.float32)
x.dtype

torch.float32

In [11]:
x = x.to(torch.float16)
x.dtype

torch.float16

### copying tensors

In [12]:
y = x.clone()
y

tensor([[ 0.4028, -2.4766, -0.1737],
        [ 1.5254, -0.1382,  1.9590],
        [ 1.1875,  0.5259,  1.2021],
        [ 0.1289, -0.4368,  0.1846],
        [ 0.2649, -0.1390,  0.7344]], dtype=torch.float16)

### tensors from Numpy ndarrays

In [13]:
import numpy as np
a = np.ones(5)
a

array([1., 1., 1., 1., 1.])

In [14]:
b = torch.from_numpy(a)
b

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

In [15]:
c = torch.tensor(a)
c

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

## Arithmetic Operations on Tensors


In [16]:
x.size()

torch.Size([5, 3])

In [17]:
y = torch.rand(5, 3)
x + y

tensor([[ 1.2932, -2.2113,  0.3786],
        [ 2.1093,  0.3529,  2.1325],
        [ 1.5165,  0.7086,  1.9312],
        [ 0.1606, -0.3188,  0.1866],
        [ 0.2861,  0.3016,  1.1852]])

In [18]:
torch.add(x, y)

tensor([[ 1.2932, -2.2113,  0.3786],
        [ 2.1093,  0.3529,  2.1325],
        [ 1.5165,  0.7086,  1.9312],
        [ 0.1606, -0.3188,  0.1866],
        [ 0.2861,  0.3016,  1.1852]])

In [19]:
y.add(x)

tensor([[ 1.2932, -2.2113,  0.3786],
        [ 2.1093,  0.3529,  2.1325],
        [ 1.5165,  0.7086,  1.9312],
        [ 0.1606, -0.3188,  0.1866],
        [ 0.2861,  0.3016,  1.1852]])

In [20]:
y

tensor([[0.8904, 0.2653, 0.5523],
        [0.5840, 0.4911, 0.1735],
        [0.3290, 0.1827, 0.7290],
        [0.0317, 0.1179, 0.0020],
        [0.0212, 0.4406, 0.4508]])

In [21]:
# adds x to y
y.add_(x)

tensor([[ 1.2932, -2.2113,  0.3786],
        [ 2.1093,  0.3529,  2.1325],
        [ 1.5165,  0.7086,  1.9312],
        [ 0.1606, -0.3188,  0.1866],
        [ 0.2861,  0.3016,  1.1852]])

In [22]:
y

tensor([[ 1.2932, -2.2113,  0.3786],
        [ 2.1093,  0.3529,  2.1325],
        [ 1.5165,  0.7086,  1.9312],
        [ 0.1606, -0.3188,  0.1866],
        [ 0.2861,  0.3016,  1.1852]])

## **All the other Arithmetic operations (subtraction, multiplication, division) have congruent interfaces as the addition operation**

## Matrix Multiplication in PyTorch

 Use ``tensor.mm`` for matrix multiplications: The shapes/sizes must be compatible

In [23]:
a = torch.randn(3, 2)
b = torch.randn(2, 4)
c = a.mm(b)
c

tensor([[ 0.1514,  0.7039,  0.3198,  1.1356],
        [-2.4708,  0.2800, -6.5669, -3.3122],
        [ 1.0727, -0.8314,  2.9322,  0.5198]])

## Indexing Tensors


**Indexing or slicing PyTorch tensors is very similar to indexing or slicing Numpy ndArrays**



## e.g.,  This retrieves the item at column `1` of every row in ``x``

In [24]:
x

tensor([[ 0.4028, -2.4766, -0.1737],
        [ 1.5254, -0.1382,  1.9590],
        [ 1.1875,  0.5259,  1.2021],
        [ 0.1289, -0.4368,  0.1846],
        [ 0.2649, -0.1390,  0.7344]], dtype=torch.float16)

In [25]:
x[:, 1]

tensor([-2.4766, -0.1382,  0.5259, -0.4368, -0.1390], dtype=torch.float16)

In [26]:
x[..., 1]

tensor([-2.4766, -0.1382,  0.5259, -0.4368, -0.1390], dtype=torch.float16)

Use ``tensor.item()`` to retrieve the item of a scalar tensor

In [27]:
a = torch.tensor(20)
a.item()

20

In [28]:
a = torch.tensor([20])
a.item()

20

In [29]:
b = torch.empty(2, 3)
b.dtype

torch.float32

## Reshaping or re-arranging the dimension of Tensors
Use ``torch.view`` for reshaping/resizing tensors



In [30]:
x.size()

torch.Size([5, 3])

In [31]:
x.numel()

15

In [32]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
x.size(), y.size(), z.size()

(torch.Size([4, 4]), torch.Size([16]), torch.Size([2, 8]))

### swapping axes

In [33]:
x = torch.randn(2, 3, 4)
x.size()

torch.Size([2, 3, 4])

In [34]:
x = x.permute(1, 2, 0)
x.size()

torch.Size([3, 4, 2])

### transpose

In [35]:
x = torch.randn(5, 6)
x.size()

torch.Size([5, 6])

In [36]:
x.T.size()

torch.Size([6, 5])

In [37]:
x.t().size()

torch.Size([6, 5])

## More functions on Tensors
  `here <https://pytorch.org/docs/torch>`.




# Autograd: Automatic Differentiation

Central to all neural networks in PyTorch is the ``autograd`` package. The package provides automatic differentiation for all operations on Tensors.

If a tensor's
- ``requires_grad`` attribute is on, or 
- it is the result of an operation which involved a tensors whose ``requires_grad`` is on, 

the tensor tracks all operations on it.

At the end of the computations, a call to ``.backward`` will automatically compute the gradients of all the tensors involved in the computations whose ``requires_grad`` are active. 

## Construction of gradient-enabled Tensors

In [38]:
# set require_grad attribute to True
x = torch.ones(2, 2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [39]:
z = torch.rand(2, 3)
z.requires_grad

False

In [40]:
z.requires_grad = True
z.requires_grad

True

In [41]:
z = z.detach()
z.requires_grad

False

In [42]:
# a tensor operation involving a gradient activated tensor
y = 3 * x
y.requires_grad

True

## Checking whether a tensor requires a gradient

In [43]:
# inspect the requires_grad attribute
y.requires_grad

True

In [44]:
# inspect whether the tensor has a gradient function
y.grad_fn

<MulBackward0 at 0x10352a760>

## Backprop

In [45]:
x = torch.tensor(20.0, requires_grad=True)
y = 3 * x 
y.backward()

In [46]:
# use the grad attribute to retrieve the gradient
x.grad

tensor(3.)

## Autograd Functions

- automatic differentiation functions
- Essential to the `nn` package

In [47]:
# must inheret from torch.autograd.function
# must also overide the forward and backward methods
class ExampleAutogradFunction(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, x, w, b):
        # x, w are vectors
        # b is a scalar
        # ctx is a context variable
        ctx.save_for_backward(x, w, b)
        y = w.T.mm(x) + b
        return y
    
    @staticmethod
    def backward(ctx, dzdy):
        x, w, b, = ctx.saved_tensors
        dzdb = dzdy
        dzdx = w.mm(dzdy)
        dzdw = x.mm(dzdy)
        
        # gradients must be returned in the 
        # order in which the inputs were passed 
        # to the forward function
        return dzdx, dzdw, dzdb

## Backprop-continued

In [48]:
# Testing the Autograd function
x = torch.rand(10, 1, dtype=torch.float64, requires_grad=True)
w = torch.rand(10, 1,  dtype=torch.float64, requires_grad=True)
b = torch.rand(1,  dtype=torch.float64, requires_grad=True)

# Invoke the forward prop
autograd_fun = ExampleAutogradFunction.apply

y = autograd_fun(x, w, b)

# backward prop
y.backward()

In [49]:
# accessing the gradients
x.grad

tensor([[0.8631],
        [0.4217],
        [0.5236],
        [0.2684],
        [0.5103],
        [0.2453],
        [0.3035],
        [0.5123],
        [0.4138],
        [0.7739]], dtype=torch.float64)

In [50]:
x.size()

torch.Size([10, 1])

In [51]:
w.grad

tensor([[0.2189],
        [0.4729],
        [0.1458],
        [0.4982],
        [0.3065],
        [0.1710],
        [0.8184],
        [0.9151],
        [0.3508],
        [0.8478]], dtype=torch.float64)

## Gradcheck

In [52]:
eps = 1e-10
atol = 1e-10

# the input must in double precision. Otherwise, the function will fail
is_correct = torch.autograd.gradcheck(autograd_fun, (x, w, b))

is_correct

True