## Pytorch Basics

Phai Phongthiengtham

See https://pytorch.org/get-started/locally/ for installation

In [1]:
!python3 --version

Python 3.10.10


In [2]:
import torch
print(torch.__version__)

1.13.1


### 1. Initializing tensor

In [3]:
x1 = torch.empty(2, 3)
x2 = torch.rand(2, 2)
x3 = torch.zeros(2, 2)
x4 = torch.ones(2, 2, dtype=torch.double) # specify data type
x5 = torch.ones(2, 2, dtype=torch.float16)
print(x5)
print(x5.size())

tensor([[1., 1.],
        [1., 1.]], dtype=torch.float16)
torch.Size([2, 2])


### 2. Basic operations

In [4]:
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print('x =', x)
print('y =', y)

z1 = x + y
z2 = torch.add(x, y) # element-wise addition
print('z1 =', z1)
print('z2 =', z2)
print('\ncheck if z1 == z2:\n')
print(z1 == z2)

z1 = x - y
z2 = torch.sub(x, y) # element-wise subtraction
print(z1 == z2)

z1 = x * y
z2 = torch.mul(x, y) # element-wise multiplication
print(z1 == z2)

z1 = x / y
z2 = torch.div(x, y) # element-wise division
print(z1 == z2)

x = tensor([[0.5792, 0.4390],
        [0.8570, 0.2436]])
y = tensor([[0.0793, 0.3000],
        [0.0034, 0.2840]])
z1 = tensor([[0.6585, 0.7390],
        [0.8603, 0.5276]])
z2 = tensor([[0.6585, 0.7390],
        [0.8603, 0.5276]])

check if z1 == z2:

tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])


### 3. Inplace operations

In [5]:
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print('x =', x)
print('y =', y)
print('\nany function with "_" will do inplace operation\n')

y.add_(x)
print('y =', y)

x = tensor([[0.7245, 0.7369],
        [0.4216, 0.4606]])
y = tensor([[0.0717, 0.5942],
        [0.2380, 0.3937]])

any function with "_" will do inplace operation

y = tensor([[0.7963, 1.3311],
        [0.6596, 0.8543]])


### 4. Slicing

In [6]:
x = torch.rand(5, 3)
print('x =', x, '\n')
print('x[:, 0] =', x[:, 0], '\n') # first column
print('x[1, :] =', x[1, :], '\n') # second row
print('x[1, 1] =', x[1, 1], '\n') # one element
print('x[1, 1].item() =', x[1, 1].item(), '\n') # convert to number

print('.item() only works when there is only one element in the tensor.')

x = tensor([[0.3912, 0.5674, 0.3688],
        [0.1471, 0.5974, 0.8289],
        [0.2684, 0.4225, 0.8702],
        [0.9961, 0.8886, 0.5931],
        [0.8074, 0.2100, 0.5713]]) 

x[:, 0] = tensor([0.3912, 0.1471, 0.2684, 0.9961, 0.8074]) 

x[1, :] = tensor([0.1471, 0.5974, 0.8289]) 

x[1, 1] = tensor(0.5974) 

x[1, 1].item() = 0.597387969493866 

.item() only works when there is only one element in the tensor.


### 5. Reshaping tensors
- tensor.view() and tensor.reshape().
- tensor.reshape() is more robust, and will work on any tensor.

In [15]:
x = torch.rand(4, 3)
print('x =', x, '\n')
print('----- reshape tensors -----\n')
y = x.reshape(2, 6) # reshape tensors
print('y = x.reshape(2, 6) = ', y, '\n')

z = x.reshape(-1) # reshape tensors
print('z = x.reshape(-1) = ', z, '\n')

print('----- view tensors -----\n')
print('x.view(2, 6) = ', x.view(2, 6))

x = tensor([[0.2487, 0.6643, 0.3853],
        [0.2413, 0.0839, 0.6120],
        [0.8982, 0.2242, 0.1700],
        [0.6750, 0.6319, 0.6323]]) 

----- reshape tensors -----

y = x.reshape(2, 6) =  tensor([[0.2487, 0.6643, 0.3853, 0.2413, 0.0839, 0.6120],
        [0.8982, 0.2242, 0.1700, 0.6750, 0.6319, 0.6323]]) 

z = x.reshape(-1) =  tensor([0.2487, 0.6643, 0.3853, 0.2413, 0.0839, 0.6120, 0.8982, 0.2242, 0.1700,
        0.6750, 0.6319, 0.6323]) 

----- view tensors -----

x.view(2, 6) =  tensor([[0.2487, 0.6643, 0.3853, 0.2413, 0.0839, 0.6120],
        [0.8982, 0.2242, 0.1700, 0.6750, 0.6319, 0.6323]])


### 6. Converting to and from numpy

In [8]:
import numpy as np

x = torch.ones(5)
y = x.numpy() # convert tensor to numpy array
print('x =', x, '\n')
print('y =', y, '\n')
print("BE CAREFUL - if used in CPU, a and b will use the same memory.\n")
x.add_(1)
print('x =', x, ' (+1 to all element in x)\n')
print('y =', y, ' (y also changed)\n')

print('------------\n')

x = np.ones(5)
y = torch.from_numpy(x) # create tensor from numpy array
print('y =', y, '\n')

print('In most cases, numpy arrays are converted to or from tensors only once.')

x = tensor([1., 1., 1., 1., 1.]) 

y = [1. 1. 1. 1. 1.] 

BE CAREFUL - if used in CPU, a and b will use the same memory.

x = tensor([2., 2., 2., 2., 2.])  (+1 to all element in x)

y = [2. 2. 2. 2. 2.]  (y also changed)

------------

y = tensor([1., 1., 1., 1., 1.], dtype=torch.float64) 

In most cases, numpy arrays are converted to or from tensors only once.


### 7. Computing gradient

In [9]:
x = torch.ones(5, requires_grad=True) # tell pytorch to calculate gredient
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [10]:
import torch

x = torch.randn(3, requires_grad=True)
print('x = ', x, '\n')

y = x + 2
z = y * y * 2
v = torch.tensor([1, 1, 1], dtype=torch.float32) # to be used in computing gradient

# manual calculation
# z = 2*y^2 = 2*(x+2)^2 = 2*(x^2 + 4*x + 4)
# z = 2*x^2 + 8*x + 8, dz/dx = 4*x + 8 
# also doable using chain-rule: dz/dx = dz/dy * dy/dx 

print('z = ', z, '\n')
z.backward(v) # dz/dx
print('dz/dx =', x.grad, '\n')
print('dz/dx =', 4*x + 8, '\n')

x =  tensor([-0.4319, -0.2821, -0.7478], requires_grad=True) 

z =  tensor([4.9176, 5.9022, 3.1361], grad_fn=<MulBackward0>) 

dz/dx = tensor([6.2723, 6.8715, 5.0089]) 

dz/dx = tensor([6.2723, 6.8715, 5.0089], grad_fn=<AddBackward0>) 



In [11]:
# how to remove requires_grad=True
x = torch.randn(3, requires_grad=True)

print('x =', x)
print('x.detach() =', x.detach())

# how to tell pytorch NOT to compute gradient when requires_grad=True
with torch.no_grad():
    x.add_(1)

x = tensor([-1.1067, -1.9615,  1.2362], requires_grad=True)
x.detach() = tensor([-1.1067, -1.9615,  1.2362])


In [12]:
# .grad command will keep adding gredient. Have to set to zero afterward.

weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad) # the gradient for the second epoch is WRONG.

print('--------')

weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # this is required! 
    # now the gradient is correct.

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
--------
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
