# Tutorial 1 of DDM5005
Yaowen ZHANG(yaowen.zhang@connect.ust.hk)

This tutorial is about basic applications of pytorch. [Pytorch](https://pytorch.org/tutorials/) is a deep learning library developed by Meta AI. It utilizes computational graph and autodiff to build and train the neural network.

'device' is where to train the model and load the data. The pytorch has three available device options. 

GPU and CPU are two most common devices. MPS are designed for mac os. If you have multiple GPUs, you can use the following way to distinguish them, like

'cuda:0', 'cuda:1', 'cuda:2'......

In [42]:
import torch
print('torch version:',torch.__version__)
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)
print('Using device:', device)


torch version: 1.12.1
Using device: cpu


### concept ###
tensor can be seen as a multidimensional array. For instance, vector is a tenor with dimensional 1. Matrix is a tensor with dimensian 2. 

In [43]:
a = torch.randn(2, 3)    
# each entry is an normal distribution     
print('tensor a:', a)
print('a.shape:', a.shape)
print('a.dtype:', a.dtype)
print('a.device', a.device)
a = a.to(torch.float64)
print('tensor a:', a)
print('a.dtype:', a.dtype)

a = torch.randn(2,3, dtype=torch.float64)
print('a.mean:', torch.mean(a), a.mean())
print('a.var:', torch.var(a), a.var())

tensor a: tensor([[1.2631, 0.4896, 0.6071],
        [2.2538, 1.4626, 0.8342]])
a.shape: torch.Size([2, 3])
a.dtype: torch.float32
a.device cpu
tensor a: tensor([[1.2631, 0.4896, 0.6071],
        [2.2538, 1.4626, 0.8342]], dtype=torch.float64)
a.dtype: torch.float64
a.mean: tensor(-0.3491, dtype=torch.float64) tensor(-0.3491, dtype=torch.float64)
a.var: tensor(0.9889, dtype=torch.float64) tensor(0.9889, dtype=torch.float64)


### Initialize a tensor ###


In [44]:
# create a tensor with all zero entry
a = torch.zeros((2,3))
print('a:', a)
print('a.dtype', a.dtype)

# create a tensor with all one entry
a = torch.ones((2, 2), dtype=torch.float16)
print('a:', a)

b = torch.zeros_like(a)
print('b:', b)

b = torch.Tensor.new(a)
print('b.shape', b.shape)
print('b:', b)      # b is empty, but has the same properity as a


a: tensor([[0., 0., 0.],
        [0., 0., 0.]])
a.dtype torch.float32
a: tensor([[1., 1.],
        [1., 1.]], dtype=torch.float16)
b: tensor([[0., 0.],
        [0., 0.]], dtype=torch.float16)
b.shape torch.Size([0])
b: tensor([], dtype=torch.float16)


In [45]:
import numpy as np
a = np.array([[1, 2], [3, 4]])
print(a)
b = torch.from_numpy(a)
print(b)
print(b.numpy())

a = np.linspace(start=0, stop=1, num=5)
print(a)
b = torch.linspace(start=0, end=1, steps=5)
print(b)
a = np.arange(start=0, stop=5)
print(a)
b = torch.arange(start=0, end=5)
print(b)

[[1 2]
 [3 4]]
tensor([[1, 2],
        [3, 4]])
[[1 2]
 [3 4]]
[0.   0.25 0.5  0.75 1.  ]
tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])
[0 1 2 3 4]
tensor([0, 1, 2, 3, 4])


In [50]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print('a.shape', a.shape,'\n a:', a)
b = a.view(3, 2)
print('a:', a)
print('b:', b)
b[0, 0] = 0
print('b:', b)
print('a:', a)

b= (a.t().contiguous()).view(1, 6)
print(b)
b = (a.t()).view(1, 6)

a.shape torch.Size([2, 3]) 
 a: tensor([[1, 2, 3],
        [4, 5, 6]])
a: tensor([[1, 2, 3],
        [4, 5, 6]])
b: tensor([[1, 2],
        [3, 4],
        [5, 6]])
b: tensor([[0, 2],
        [3, 4],
        [5, 6]])
a: tensor([[0, 2, 3],
        [4, 5, 6]])
tensor([[0, 4, 2, 5, 3, 6]])


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

'view'； the two variable shares the same memory. It has the contiguity constraints


In [52]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print('a.shape', a.shape,'\n a:', a)
b = a.reshape(3, 2)
print('a:', a)
print('b:', b)
b[0, 0] = 0
print('b:', b)
print('a:', a)

b = (a.t()).reshape(1, 6)
print('b:', b)
print('a:', a)

b[0,0] = 1
print('b:', b)
print('a:', a)

a.shape torch.Size([2, 3]) 
 a: tensor([[1, 2, 3],
        [4, 5, 6]])
a: tensor([[1, 2, 3],
        [4, 5, 6]])
b: tensor([[1, 2],
        [3, 4],
        [5, 6]])
b: tensor([[0, 2],
        [3, 4],
        [5, 6]])
a: tensor([[0, 2, 3],
        [4, 5, 6]])
b: tensor([[0, 4, 2, 5, 3, 6]])
a: tensor([[0, 2, 3],
        [4, 5, 6]])
b: tensor([[1, 4, 2, 5, 3, 6]])
a: tensor([[0, 2, 3],
        [4, 5, 6]])


## operation ##
element-wise product and tensor product

In [56]:
a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[1, 1], [2, 2]])
print('a:', a)
print('b:', b)
print('element-wise product:\n', a*b)
print('matrix product:\n', torch.matmul(a, b))

a: tensor([[1, 2],
        [3, 4]])
b: tensor([[1, 1],
        [2, 2]])
element-wise product:
 tensor([[1, 2],
        [6, 8]])
matrix product:
 tensor([[ 5,  5],
        [11, 11]])


broadcast mechanism

In [57]:
a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([1, 2])
print(a.shape,b.shape)
print(a, b)
print(a*b)

torch.Size([2, 2]) torch.Size([2])
tensor([[1, 2],
        [3, 4]]) tensor([1, 2])
tensor([[1, 4],
        [3, 8]])


min, max, sum

In [64]:
a = torch.randn(2, 3)
print(a)
print(torch.max(a), a.max())
print(a.sum(dim=0), a.sum(dim=-1))

tensor([[ 0.0787,  1.7426, -0.7115],
        [-0.3720,  1.1657,  1.6526]])
tensor(1.7426) tensor(1.7426)
tensor([-0.2934,  2.9083,  0.9410]) tensor([1.1097, 2.4463])


In [70]:
a = torch.randn(2, 3)
print(a)
print(a[0, 0])
print(a[1,:])
print(a.reshape(-1))              # flatten the tensor
print(a.reshape(1, 2, 3))         # add dimension

b = torch.randn(1, 3)
print(torch.cat((a, b), dim=0))
print(torch.cat((a, b, b), dim=0))

tensor([[ 0.2780, -2.5604,  0.3979],
        [-1.2499,  1.0249, -1.0744]])
tensor(0.2780)
tensor([-1.2499,  1.0249, -1.0744])
tensor([ 0.2780, -2.5604,  0.3979, -1.2499,  1.0249, -1.0744])
tensor([[[ 0.2780, -2.5604,  0.3979],
         [-1.2499,  1.0249, -1.0744]]])
tensor([[ 0.2780, -2.5604,  0.3979],
        [-1.2499,  1.0249, -1.0744],
        [ 1.4849, -0.8208,  0.7947]])
tensor([[ 0.2780, -2.5604,  0.3979],
        [-1.2499,  1.0249, -1.0744],
        [ 1.4849, -0.8208,  0.7947],
        [ 1.4849, -0.8208,  0.7947]])


## copy problem ##
The key is to see whether a new memory is created and assigned



In [73]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(a.shape, a)
b = a[0,:]
print(b.shape, b)
b[0] = 0
print(b)
print(a)
# shallow copy, a and b share the same memory

torch.Size([2, 3]) tensor([[1, 2, 3],
        [4, 5, 6]])
torch.Size([3]) tensor([1, 2, 3])
tensor([0, 2, 3])
tensor([[0, 2, 3],
        [4, 5, 6]])


In [74]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(a.shape, a)
b = a[0,:].clone()
print(b.shape, b)
b[0] = 0
print(b)
print(a)

torch.Size([2, 3]) tensor([[1, 2, 3],
        [4, 5, 6]])
torch.Size([3]) tensor([1, 2, 3])
tensor([0, 2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])


In-place operation is an operation that changes directly the content of a given linear algebra, vector, matrices(Tensor) without making a copy.

avoid inplace operation in pytorch since it will break the computation graph, 


In [80]:
from torch import autograd
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32,  requires_grad=True)
y = torch.sum(a)

grads = autograd.grad(outputs=y, inputs=a)[0]
print(grads)


tensor([[1., 1.],
        [1., 1.]])


inplace operation to replace one entery of a

In [81]:
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32,  requires_grad=True)
a[0, 0] = 0
y = torch.sum(a)

grads = autograd.grad(outputs=y, inputs=a)[0]
print(grads)

RuntimeError: a view of a leaf Variable that requires grad is being used in an in-place operation.

In [82]:
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32,  requires_grad=True)
mask = torch.ones_like(a)
mask[0,0] = 0
y = torch.sum(a*mask)

grads = autograd.grad(outputs=y, inputs=a)[0]
print(grads)

tensor([[0., 1.],
        [1., 1.]])


## Broadcast ##

In [83]:
a = torch.randn(4)
b = torch.randn(1)
print(a.shape, b.shape, (a*b).shape)

torch.Size([4]) torch.Size([1]) torch.Size([4])


In [85]:
a = torch.randn(4, 3)
b = torch.randn(3)
print(a.shape, b.shape, (a*b).shape)

torch.Size([4, 3]) torch.Size([3]) torch.Size([4, 3])


In [88]:
a = torch.randn(4, 1)
b = torch.randn(3,1)
print(a.shape, b.shape, (a*b).shape)

RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 0

In [87]:
a = torch.randn(1, 4, 1)
b = torch.randn(3, 1, 1)
print(a.shape, b.shape, (a*b).shape)

torch.Size([1, 4, 1]) torch.Size([3, 1, 1]) torch.Size([3, 4, 1])


## Permutation and Transpose ##

In [91]:
a = torch.randn(4, 3, 2)
print(a.shape)

print(a.permute(1, 2, 0).shape)

torch.Size([4, 3, 2])
torch.Size([3, 2, 4])


In [93]:
a = torch.randn(2, 3)
print(a.transpose(0,1).shape)

a = torch.randn(1, 2, 3, 4 )
print(a.transpose(1,3).shape)

torch.Size([3, 2])
torch.Size([1, 4, 3, 2])


## Special Matrix ##

identity matrix


In [97]:
a = torch.eye(3)
print(a)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


diagonal matrix

In [99]:
a = torch.tensor([1, 2, 3, 4])
print(torch.diag(a))

tensor([[1, 0, 0, 0],
        [0, 2, 0, 0],
        [0, 0, 3, 0],
        [0, 0, 0, 4]])


torch.triu() return the upper triangular matrix
torch.tril() return the lower triangular matrix

In [103]:
a = torch.randn(4, 4)
print(a)
print(a.triu())
print(a.triu( diagonal=1))
print(a.triu( diagonal=2))
print(a.tril())

tensor([[-0.6811,  0.0451, -2.2809, -0.2531],
        [ 2.6170, -0.4712,  1.1841, -0.9455],
        [-0.6117,  0.1086, -0.1518,  0.3432],
        [-0.9744,  0.1601, -0.2506, -1.2778]])
tensor([[-0.6811,  0.0451, -2.2809, -0.2531],
        [ 0.0000, -0.4712,  1.1841, -0.9455],
        [ 0.0000,  0.0000, -0.1518,  0.3432],
        [ 0.0000,  0.0000,  0.0000, -1.2778]])
tensor([[ 0.0000,  0.0451, -2.2809, -0.2531],
        [ 0.0000,  0.0000,  1.1841, -0.9455],
        [ 0.0000,  0.0000,  0.0000,  0.3432],
        [ 0.0000,  0.0000,  0.0000,  0.0000]])
tensor([[ 0.0000,  0.0000, -2.2809, -0.2531],
        [ 0.0000,  0.0000,  0.0000, -0.9455],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000]])
tensor([[-0.6811,  0.0000,  0.0000,  0.0000],
        [ 2.6170, -0.4712,  0.0000,  0.0000],
        [-0.6117,  0.1086, -0.1518,  0.0000],
        [-0.9744,  0.1601, -0.2506, -1.2778]])
