The goal of this notebook is:
1. understand basic tensor in Pytorch and understand neural networks
2. train a simple neural network using Pytorch.

# What is Pytorch

1. a replacement of numpy
2. a deep learning platform

## Tensors

In [4]:
import torch


construct matrix uninitialized

In [5]:
x = torch.empty(3,5)
print(x)

tensor([[ 0.0000, -0.0000,  0.0000, -0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000, -0.0000],
        [ 0.0000, -0.0000,  0.0000,  0.0000,  0.0000]])


random matrix

In [7]:
x = torch.rand(5,3)
print(x)

tensor([[0.1805, 0.4055, 0.8403],
        [0.2339, 0.8852, 0.8360],
        [0.6637, 0.1865, 0.2590],
        [0.0250, 0.8807, 0.4434],
        [0.6746, 0.9548, 0.0405]])


In [11]:
x = torch.zeros(5,3, dtype=torch.long)
print(x)
print(type(x))
print(type(x[0][0]))

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
<class 'torch.Tensor'>
<class 'torch.Tensor'>


construct directly from data

In [12]:
x = torch.Tensor([3,4,5])
print(x)

tensor([3., 4., 5.])


In [13]:
print(x.size())

torch.Size([3])


construct new tensor based on old one

In [14]:
x = x.new_ones(3,3, dtype=torch.double)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [16]:
x = torch.ones_like(x, dtype=torch.int32)
print(x)

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]], dtype=torch.int32)


In [18]:
x = torch.randn_like(x, dtype=torch.float32)
print(x)
print(x.size())

tensor([[-0.0185,  1.4717,  0.4568],
        [-0.8998,  0.4444,  0.6651],
        [-0.4391, -0.5540, -1.6694]])
torch.Size([3, 3])


In [20]:
x = torch.rand(3,4,4)
print(x)
print(x.size())

tensor([[[0.0856, 0.4866, 0.6926, 0.0367],
         [0.2352, 0.0184, 0.8140, 0.3392],
         [0.5096, 0.7534, 0.9133, 0.3604],
         [0.7197, 0.1477, 0.4538, 0.9677]],

        [[0.5368, 0.7856, 0.5766, 0.5369],
         [0.0594, 0.3615, 0.9465, 0.6057],
         [0.0638, 0.0212, 0.2269, 0.1415],
         [0.2107, 0.9636, 0.0297, 0.2266]],

        [[0.3703, 0.8431, 0.2217, 0.4317],
         [0.6540, 0.0782, 0.2226, 0.3643],
         [0.9554, 0.2680, 0.5717, 0.5713],
         [0.1130, 0.4102, 0.2661, 0.0365]]])
torch.Size([3, 4, 4])


## operations

In [25]:
x = torch.rand(5,3)
y = torch.ones(5,3)
print(x)
print(y)
print(x+y)

tensor([[0.4075, 0.1965, 0.5514],
        [0.8019, 0.9419, 0.6622],
        [0.6037, 0.7357, 0.7967],
        [0.1963, 0.7603, 0.6708],
        [0.4799, 0.8101, 0.3445]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1.4075, 1.1965, 1.5514],
        [1.8019, 1.9419, 1.6622],
        [1.6037, 1.7357, 1.7967],
        [1.1963, 1.7603, 1.6708],
        [1.4799, 1.8101, 1.3445]])


In [26]:
# or use torch.add
print(torch.add(x,y))

tensor([[1.4075, 1.1965, 1.5514],
        [1.8019, 1.9419, 1.6622],
        [1.6037, 1.7357, 1.7967],
        [1.1963, 1.7603, 1.6708],
        [1.4799, 1.8101, 1.3445]])


output to some variable

In [27]:
result = torch.empty(5,3)
torch.add(x, y, out=result)
print(result)

tensor([[1.4075, 1.1965, 1.5514],
        [1.8019, 1.9419, 1.6622],
        [1.6037, 1.7357, 1.7967],
        [1.1963, 1.7603, 1.6708],
        [1.4799, 1.8101, 1.3445]])


In [28]:
# another way is to add inplace
# Any operation that mutates a tensor in-place is post-fixed with an _. 
# For example: x.copy_(y), x.t_(), will change x.
#y.add_(x)
#print(y)


get part of tensor. similar to numpy

In [30]:
# second column of x
print(x[:, 1])

tensor([0.1965, 0.9419, 0.7357, 0.7603, 0.8101])


reshape the tensor

In [32]:
x = torch.rand(4,4)
y = x.view(1,16)
z = x.view(-1, 2) # -1 means the size will be inferred by other dimensions
print(x)
print(y)
print(z)
print(x.size(), y.size(), z.size())

tensor([[0.3921, 0.5822, 0.2177, 0.1633],
        [0.4591, 0.6607, 0.7859, 0.7058],
        [0.9665, 0.6715, 0.9708, 0.5682],
        [0.3508, 0.9410, 0.0879, 0.8516]])
tensor([[0.3921, 0.5822, 0.2177, 0.1633, 0.4591, 0.6607, 0.7859, 0.7058, 0.9665,
         0.6715, 0.9708, 0.5682, 0.3508, 0.9410, 0.0879, 0.8516]])
tensor([[0.3921, 0.5822],
        [0.2177, 0.1633],
        [0.4591, 0.6607],
        [0.7859, 0.7058],
        [0.9665, 0.6715],
        [0.9708, 0.5682],
        [0.3508, 0.9410],
        [0.0879, 0.8516]])
torch.Size([4, 4]) torch.Size([1, 16]) torch.Size([8, 2])


get the value of tensor

In [35]:
print(x[1][1])
print(x[1][1].item())
# print(x.item()) # only one element of tensor can be converted to Python scalars

tensor(0.6607)
0.6606928706169128


## Numpy bridge

The torch and numpy will share underlying memory locations, and change one will change the other.

In [36]:
x = torch.Tensor([1,2,3,4])
print(x)

tensor([1., 2., 3., 4.])


In [37]:
y = x.numpy()
print(y)

[1. 2. 3. 4.]


In [38]:
x.add_(1)
print(x)
print(y)

tensor([2., 3., 4., 5.])
[2. 3. 4. 5.]


In [42]:
import numpy as np
y = np.ones(4)
x = torch.from_numpy(y)
print(x)

tensor([1., 1., 1., 1.], dtype=torch.float64)


In [43]:
y = np.add(y, 1) # y is not the same y as before. so y and x are now not the same variable. 
                 # Pay attention to this!!!!!!
print(y)
print(x)

[2. 2. 2. 2.]
tensor([1., 1., 1., 1.], dtype=torch.float64)


In [44]:
np.add(y, 1, out=y)
print(y)
print(x)

[3. 3. 3. 3.]
tensor([1., 1., 1., 1.], dtype=torch.float64)


In [45]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## CUDA Tensors

In [46]:
# We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device('cuda') # cuda device object
    y = torch.ones(3,3, device=device) # assign device to cuda
    x = torch.ones(3,3)
    x.to(device) # also can use .to(device)
    z = x+y
    print(z)
    print(z.to('cpu', torch.double))
    

# Autograd automatic differentiation

## Tensor

when you set attribute `.requires_grad` to `True`, the pytorch will track the operations, and when you do `.backward()`, it will compute the gradients automatically. You can use `.grad` or `.grad_fn` to get the gradient. `.grad_fn` for function.

To un-track the gradients, use `.detach()`. 

when evaluate, you do not need gradient, so use `with torch.no_grad():`

In [3]:
import torch

In [4]:
x = torch.ones(3,3, requires_grad=True)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)


In [5]:
y = x + 2
print(y)
print(y.grad_fn)

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]], grad_fn=<AddBackward>)
<AddBackward object at 0x11c9c8d30>


In [6]:
z = y*y*3
out = z.mean()
print(z, out)

tensor([[27., 27., 27.],
        [27., 27., 27.],
        [27., 27., 27.]], grad_fn=<MulBackward>) tensor(27., grad_fn=<MeanBackward1>)


In [7]:
a = torch.rand(3,3)
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)

False
True


## Gradient

In [8]:
x = torch.ones(2,3, requires_grad=True)
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)

tensor([[3., 3., 3.],
        [3., 3., 3.]])


In [9]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False
