# PyTorch Tutorial

PyTorch is a Python-based scientific computing package.

The fundamental PyTorch concept is the __Tensor__, which is conceptually identical to a numpy array, i.e. an $n$-dimensional array. 

> __Like numpy array__ a PyTorch Tensor is a generic tool for scientific computing.

> __Unlike numpy array__ a PyTorch Tensor can use GPU to accelerate numeric computations. 

Moreover, PyTorch is a deep learning research platform that provides maximum flexibility and speed.

## Tensor
For further details, see https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py

In [3]:
import torch

In [4]:
##Construct a 5x3 matrix, uninitialized
m = torch.empty(5, 3)
print(m)

tensor([[-5.9265e-26,  4.5905e-41,  1.7983e-37],
        [ 0.0000e+00, -2.2659e+22,  4.5904e-41],
        [-1.5273e-31,  4.5905e-41, -2.4139e+22],
        [ 4.5904e-41, -2.2880e+22,  4.5904e-41],
        [-2.2658e+22,  4.5904e-41,  0.0000e+00]])


In [6]:
##Construct a 5x3 matrix, randomly initialized
m = torch.rand(5, 3)
print(m)

tensor([[ 0.3014,  0.1197,  0.2546],
        [ 0.6978,  0.6679,  0.8752],
        [ 0.9055,  0.3034,  0.7594],
        [ 0.3435,  0.0665,  0.6568],
        [ 0.7785,  0.8107,  0.6746]])


In [8]:
##Construct a 5x3 matrix filled with zeros and of dtype Long (i.e. integers)
m = torch.zeros(5, 3, dtype=torch.long)
print(m)

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])


In [10]:
##Construct a 2x2 tensor directly from data (1:4 int)
x = torch.tensor([[1, 2],[3, 4]])
print(x)

tensor([[ 1,  2],
        [ 3,  4]])


In [12]:
##Create a 5x3 tensor of 1s, dtype double, from an existing one
x = x.new_ones(5, 3, dtype=torch.double) ##new_* method
print(x)
##Transform the tensor above in a random tensor of dtype = float
x = torch.rand_like(x, dtype = torch.float)
print(x)

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)
tensor([[ 0.2629,  0.8436,  0.1544],
        [ 0.9891,  0.6040,  0.5990],
        [ 0.5468,  0.8771,  0.3769],
        [ 0.2685,  0.9077,  0.8635],
        [ 0.0102,  0.7071,  0.9677]])


In [7]:
##get a tensor size
print(x.size())

torch.Size([5, 3])


In [22]:
##sum two tensors x and y of size 5x3
##Syntax 1
x = torch.ones(5, 3)
y = torch.rand(5, 3)
x + y

tensor([[ 1.8603,  1.9029,  1.7428],
        [ 1.2551,  1.2597,  1.4212],
        [ 1.3318,  1.0015,  1.6525],
        [ 1.1707,  1.3352,  1.2722],
        [ 1.8411,  1.3074,  1.0169]])

In [23]:
##Syntax 2
torch.add(x, y)
###save the result in a new tensor (previously defined)
z = torch.empty(5, 3)
torch.add(x, y, out=z)
print(z)

tensor([[ 1.8603,  1.9029,  1.7428],
        [ 1.2551,  1.2597,  1.4212],
        [ 1.3318,  1.0015,  1.6525],
        [ 1.1707,  1.3352,  1.2722],
        [ 1.8411,  1.3074,  1.0169]])


In [24]:
##In-place addition, substitute y with x+y
y.add_(x)
print(y) ##in-places operations are post-fixed with _ (e.g. x.t_() transposes tensor x)

tensor([[ 1.8603,  1.9029,  1.7428],
        [ 1.2551,  1.2597,  1.4212],
        [ 1.3318,  1.0015,  1.6525],
        [ 1.1707,  1.3352,  1.2722],
        [ 1.8411,  1.3074,  1.0169]])


In [18]:
##Indexing Tensors
x = torch.rand(5, 3)
print(x[:, 1]) ##returns the first column

tensor([ 0.6207,  0.1739,  0.5064,  0.2268,  0.1664])


In [19]:
##Tensor RESIZING
x = torch.randn(4, 4)
##(4x4) --> (16)
y = x.view(16)
##(4x4) --> (2x8)
z = x.view(2, -1) ##once the first dimension is fixed, the second is automatically given if -1
print(x.size(), y.size(), z.size())
print(x, y, z)

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
tensor([[-0.3076, -1.3366,  0.1111, -0.3400],
        [ 0.7326, -0.5380,  0.0312, -1.5047],
        [-0.7740,  0.0863,  0.1955,  0.7107],
        [ 0.0028, -0.4003, -0.4130,  0.0655]]) tensor([-0.3076, -1.3366,  0.1111, -0.3400,  0.7326, -0.5380,  0.0312,
        -1.5047, -0.7740,  0.0863,  0.1955,  0.7107,  0.0028, -0.4003,
        -0.4130,  0.0655]) tensor([[-0.3076, -1.3366,  0.1111, -0.3400,  0.7326, -0.5380,  0.0312,
         -1.5047],
        [-0.7740,  0.0863,  0.1955,  0.7107,  0.0028, -0.4003, -0.4130,
          0.0655]])


In [2]:
##return the value of a Tensor of dimension 1 as a Python number
x = torch.randn(1)
print(x)
print(x.item())

tensor([ 0.9504])
0.9503579139709473


In [26]:
##Convert a tensor to a numpy array
a = torch.ones(5, 3)
b = a.numpy()
print(b)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [27]:
##Add 1 to the tensor a
a.add_(1)
print(a)
print(b) ##observe how the numpy array changed in value

tensor([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.]])
[[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]


In [35]:
##convert NumPy array to PyTorch tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
##add 1 to a
np.add(a, 1, out=a)
print(a)
print(b) ##watch how the Tensor changes

[2. 2. 2. 2. 2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


### CUDA Tensor

In [19]:
##check if the CUDA device is available 
if torch.cuda.is_available():
    device = torch.device('cuda:2') 
    y = torch.ones_like(x, device=device) ##move x and y to cuda
    x = x.to(device) ##tensors can be moved onto any device using the .to method
    z = x + y ##z is stored in cuda
    print(z)
    print(z.to("cpu", torch.double)) ##move z to the cpu

tensor([[ 0.9335,  1.1431,  1.7024],
        [ 0.8835,  1.4194,  2.8523],
        [ 1.3923,  1.0960,  1.6698],
        [-0.2573,  0.6276,  0.6845],
        [ 0.9801,  0.8895,  2.4119]], device='cuda:2')
tensor([[ 0.9335,  1.1431,  1.7024],
        [ 0.8835,  1.4194,  2.8523],
        [ 1.3923,  1.0960,  1.6698],
        [-0.2573,  0.6276,  0.6845],
        [ 0.9801,  0.8895,  2.4119]], dtype=torch.float64)


In [39]:
##useful methods that can be called to inspect the devices
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(3))

0
<torch.cuda.device object at 0x7ff6e49306d8>
4
Graphics Device


## Autograd: automatic differentiation
For further details, see https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

The `autograd` package provides automatic differentiation for all the operations on tensors.

`requires_grad` is an attribute of the class `torch.Tensor`. If `requires_grad=True`, then all the operations on the tensor are tracked. Once the computation is finished, the method `.backward()` can be called, and the gradients are automatically computed. The gradient for the tensor is accumulated into `.grad` attribute.

A very important class for autograd implementation is `Function`. The classes `Tensor` and `Function` are interconnected, and build up an acyclic graph that encodes a complete hystory of computations. The variable that results from computation has an attribute `grad_fn`, that references a `Function` that has created the `Tensor`.

In [3]:
import torch

In [8]:
##Create a tensor and track the computations
x = torch.rand(5, 3, requires_grad=True)
##Do an operation on tensors
y = torch.ones(5, 3)
z = x+y
print(z)

##z is the result of an operation between tensors, hence it has the attribute grad_fn
print(z.grad_fn)

tensor([[ 0.3303,  0.7276,  0.7925],
        [ 0.2868,  0.1420,  0.4408],
        [ 0.3899,  0.2825,  0.2112],
        [ 0.8884,  0.9656,  0.3476],
        [ 0.9990,  0.7865,  0.2785]])
tensor([[ 1.3303,  1.7276,  1.7925],
        [ 1.2868,  1.1420,  1.4408],
        [ 1.3899,  1.2825,  1.2112],
        [ 1.8884,  1.9656,  1.3476],
        [ 1.9990,  1.7865,  1.2785]])
<AddBackward1 object at 0x7f42ebb9a438>


In [10]:
##requires_grad defaults to True, change the flag in-place for tensor y
y.requires_grad_(False)
h = y + 2
print(h.grad_fn) ##the attribute is None cause no operation has been tracked

None


In [11]:
##Execute a whole block without tracking the operations on tensors
with torch.no_grad():
    x = torch.ones(5, 3)
    y = x + 2
    print(y.grad_fn)

None


In [23]:
##Compute the gradient of a scalar
x = torch.ones(5, 3, requires_grad=True)
y = x + 2
z = y * y * 3

out = z.mean()
print(z) ##z is a scalar

out.backward() ##out = sum_i[(x_i+2)^2 * 3]/15, we are computing d(out)/dx_i for each i
print(x.grad) ##result stored in x's attribute .grad

tensor([[ 27.,  27.,  27.],
        [ 27.,  27.,  27.],
        [ 27.,  27.,  27.],
        [ 27.,  27.,  27.],
        [ 27.,  27.,  27.]])
tensor([[ 1.2000,  1.2000,  1.2000],
        [ 1.2000,  1.2000,  1.2000],
        [ 1.2000,  1.2000,  1.2000],
        [ 1.2000,  1.2000,  1.2000],
        [ 1.2000,  1.2000,  1.2000]])


In [27]:
##Compute the gradient of a tensor
x = torch.rand(5, requires_grad=True)
y = x * 2 + x ** 2

print(x)
print(y) ##y = [2x_1, 2x_2, ..., 2x_5]
gradient = torch.tensor([1., 2., 1., 1., 1.]) ##we need to specify a gradient argument

y.backward(gradient) ## dy/dx = [1*(2*0.299+2), 2*(2*0.64+2),...]
print(x.grad)

tensor([ 0.2991,  0.6440,  0.8020,  0.8198,  0.9514])
tensor([ 0.6877,  1.7027,  2.2470,  2.3116,  2.8078])
tensor([ 2.5982,  6.5760,  3.6039,  3.6395,  3.9027])
