# Introduction to PyTorch
Based on the [60 min blitz](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).

In [1]:
from __future__ import print_function
import torch

In [2]:
x = torch.empty(5, 3)
x

tensor([[                    0.0000,                    -0.0000,
                             0.0000],
        [-36902477754487799808.0000,                     0.0000,
                             0.0000],
        [                    0.0000,                     0.0000,
                             0.0000],
        [                    0.0000,                     0.0000,
                             0.0000],
        [                    0.0000,                     0.0000,
                             0.0000]])

In [3]:
x = torch.rand(5, 3)
x

tensor([[0.5494, 0.1876, 0.9626],
        [0.5428, 0.3290, 0.5339],
        [0.8605, 0.3591, 0.3442],
        [0.4536, 0.2631, 0.7344],
        [0.7293, 0.2673, 0.2126]])

In [4]:
x = torch.zeros(5, 3, dtype=torch.long)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [6]:
x = torch.tensor([5.5, 3.0])
x

tensor([5.5000, 3.0000])

In [7]:
x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)  

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[-1.2997, -1.6398, -0.7977],
        [-1.6208,  0.4727, -0.4422],
        [ 0.9468, -0.2420, -0.0022],
        [ 1.4043, -1.3100,  0.0150],
        [-0.2064,  0.6055,  0.9561]])


In [9]:
print(x.size())

torch.Size([5, 3])


In [11]:
x = torch.rand(5, 3)
y = torch.rand(5, 3)

print(x + y)
print(torch.add(x, y))

tensor([[0.3333, 1.2014, 1.3156],
        [1.0366, 1.5201, 0.7575],
        [0.3058, 1.5707, 0.2606],
        [0.5500, 1.5863, 1.2054],
        [0.8386, 0.6073, 0.8742]])
tensor([[0.3333, 1.2014, 1.3156],
        [1.0366, 1.5201, 0.7575],
        [0.3058, 1.5707, 0.2606],
        [0.5500, 1.5863, 1.2054],
        [0.8386, 0.6073, 0.8742]])


In [12]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
result

tensor([[0.3333, 1.2014, 1.3156],
        [1.0366, 1.5201, 0.7575],
        [0.3058, 1.5707, 0.2606],
        [0.5500, 1.5863, 1.2054],
        [0.8386, 0.6073, 0.8742]])

Any operation that mutates a tensor in-place is post-fixed with an \_. For example: `x.copy_(y)`, `x.t_()`, will change `x`.

In [14]:
y.add_(x)
y

tensor([[0.5132, 1.9347, 2.1626],
        [1.2527, 2.0847, 0.9709],
        [0.5318, 2.2602, 0.4692],
        [0.8590, 2.4415, 1.9832],
        [1.5564, 1.0223, 1.6195]])

In [15]:
print(x[:, 1])

tensor([0.7333, 0.5646, 0.6895, 0.8552, 0.4150])


In [22]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print('Sizes:', x.size(), y.size(), z.size())

print('Modifying x in-place')
x[0, 0] = 1.0
print('x[0]: {:.2f}, y: {:.2f}'.format(x[0, 0], y[0]))

Sizes: torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
Modifying x in-place
x[0]: 1.00, y: 1.00


In [24]:
x = torch.rand(1)
print(x.item())

0.4636152982711792


In [27]:
a = torch.ones(5, 1)
b = a.numpy() # Refers to the same memory

a.add_(1)
print(a)
print(b)

tensor([[2.],
        [2.],
        [2.],
        [2.],
        [2.]])
[[2.]
 [2.]
 [2.]
 [2.]
 [2.]]


In [28]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [35]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [36]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward>)


In [37]:
z = y * y * 3
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward>) tensor(27., grad_fn=<MeanBackward1>)


In [38]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x106a812e8>


Compute gradients by backpropagation:

In [39]:
out.backward()

In [40]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


Where does this come from? Let’s call the out Tensor $o$. We have that 

$$
o=\frac{1}{4}\sum_i z_i,
$$
$$ 
z_i=3(x_i+2)^2, 
$$ 
and 
$$ 
z_i|_{x_i=1}=27.
$$ 
Therefore, 
$$ 
\frac{\partial{o}}{\partial x_i}=\frac{3}{2}(x_i+2)
$$, hence 
$$
\frac{\partial o}{\partial x_i}∣_{x_i=1}=\frac{9}{2} = 4.5
$$

In [62]:
x = torch.randn(3, requires_grad=True)

y = x * 2
# while y.data.norm() < 1000:
#     y = y * 2

print(y)

tensor([-0.0845,  1.7869, -2.4562], grad_fn=<MulBackward>)


In [64]:
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
torch.autograd.backward(y, gradients)

print(x.grad)

tensor([0.4000, 4.0000, 0.0004])


In [42]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False
