## Pytorch Basics

Phai Phongthiengtham

See https://pytorch.org/get-started/locally/ for installation

In [1]:
!python3 --version

Python 3.10.10


In [2]:
import torch
print(torch.__version__)

1.13.1


In [3]:
#initializing tensor
x1 = torch.empty(2, 3)
x2 = torch.rand(2, 2)
x3 = torch.zeros(2, 2)
x4 = torch.ones(2, 2, dtype=torch.double) # specify data type
x5 = torch.ones(2, 2, dtype=torch.float16)
print(x5)
print(x5.size())

tensor([[1., 1.],
        [1., 1.]], dtype=torch.float16)
torch.Size([2, 2])


In [4]:
# basic operations
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print('x =', x)
print('y =', y)

z1 = x + y
z2 = torch.add(x, y) # element-wise addition
print('z1 =', z1)
print('z2 =', z2)
print('\ncheck if z1 == z2:\n')
print(z1 == z2)

z1 = x - y
z2 = torch.sub(x, y)
print(z1 == z2)

z1 = x * y
z2 = torch.mul(x, y)
print(z1 == z2)

z1 = x / y
z2 = torch.div(x, y)
print(z1 == z2)

x = tensor([[0.2202, 0.9227],
        [0.4301, 0.8067]])
y = tensor([[0.5956, 0.8771],
        [0.7849, 0.7276]])
z1 = tensor([[0.8158, 1.7999],
        [1.2150, 1.5343]])
z2 = tensor([[0.8158, 1.7999],
        [1.2150, 1.5343]])

check if z1 == z2:

tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])


In [5]:
# inplace operations
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print('x =', x)
print('y =', y)
print('\nany function with "_" will do inplace operation\n')

y.add_(x)
print('y =', y)

x = tensor([[0.7262, 0.8929],
        [0.6821, 0.9628]])
y = tensor([[0.0648, 0.2877],
        [0.2868, 0.9138]])

any function with "_" will do inplace operation

y = tensor([[0.7910, 1.1806],
        [0.9689, 1.8767]])


In [6]:
# slicing 
x = torch.rand(5, 3)
print('x =', x, '\n')
print('x[:, 0] =', x[:, 0], '\n') # first column
print('x[1, :] =', x[1, :], '\n') # second row
print('x[1, 1] =', x[1, 1], '\n') # one element
print('x[1, 1].item() =', x[1, 1].item(), '\n') # convert to number
# can only do this with one element in a tensor

x = tensor([[0.7717, 0.8119, 0.8034],
        [0.5650, 0.3801, 0.0972],
        [0.4583, 0.3972, 0.3675],
        [0.4423, 0.7420, 0.6171],
        [0.3318, 0.5620, 0.8335]]) 

x[:, 0] = tensor([0.7717, 0.5650, 0.4583, 0.4423, 0.3318]) 

x[1, :] = tensor([0.5650, 0.3801, 0.0972]) 

x[1, 1] = tensor(0.3801) 

x[1, 1].item() = 0.38012492656707764 



In [7]:
x = torch.rand(4, 4)
print('x =', x, '\n')
print('reshape tensors')
y = x.view(8, 2) # reshape tensors
print('y =', y, '\n')
y = x.view(-1, 8)
print('y =', y, '\n')

x = tensor([[0.3042, 0.3503, 0.1163, 0.5121],
        [0.6443, 0.4806, 0.4824, 0.5423],
        [0.1783, 0.0313, 0.4046, 0.2636],
        [0.8417, 0.9313, 0.0168, 0.1571]]) 

reshape tensors
y = tensor([[0.3042, 0.3503],
        [0.1163, 0.5121],
        [0.6443, 0.4806],
        [0.4824, 0.5423],
        [0.1783, 0.0313],
        [0.4046, 0.2636],
        [0.8417, 0.9313],
        [0.0168, 0.1571]]) 

y = tensor([[0.3042, 0.3503, 0.1163, 0.5121, 0.6443, 0.4806, 0.4824, 0.5423],
        [0.1783, 0.0313, 0.4046, 0.2636, 0.8417, 0.9313, 0.0168, 0.1571]]) 



In [8]:
import numpy as np

x = torch.ones(5)
y = x.numpy() # convert tensor to numpy array
print('x =', x, '\n')
print('y =', y, '\n')
print("BE CAREFUL - if used in CPU, a and b will use the same memory.\n")
x.add_(1)
print('x =', x, ' (+1 to all element in x)\n')
print('y =', y, ' (y also changed)\n')
print('------------')
x = np.ones(5)
y = torch.from_numpy(x) # create tensor from numpy array
print('y =', y, '\n')

x = tensor([1., 1., 1., 1., 1.]) 

y = [1. 1. 1. 1. 1.] 

BE CAREFUL - if used in CPU, a and b will use the same memory.

x = tensor([2., 2., 2., 2., 2.])  (+1 to all element in x)

y = [2. 2. 2. 2. 2.]  (y also changed)

------------
y = tensor([1., 1., 1., 1., 1.], dtype=torch.float64) 



In [9]:
x = torch.ones(5, requires_grad=True)
# tell pytorch to calculate gredient
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [10]:
import torch

x = torch.randn(3, requires_grad=True)
print('x = ', x, '\n')

y = x + 2
z = y * y * 2
v = torch.tensor([1, 1, 1], dtype=torch.float32) # to be used in computing gradient

# manual calculation
# z = 2*y^2 = 2*(x+2)^2 = 2*(x^2 + 4*x + 4)
# z = 2*x^2 + 8*x + 8, dz/dx = 4*x + 8 
# also doable using chain-rule: dz/dx = dz/dy * dy/dx 

print('z = ', z, '\n')
z.backward(v) # dz/dx
print('dz/dx =', x.grad, '\n')
print('dz/dx =', 4*x + 8, '\n')

x =  tensor([ 0.3397, -0.0672,  0.5080], requires_grad=True) 

z =  tensor([10.9480,  7.4718, 12.5798], grad_fn=<MulBackward0>) 

dz/dx = tensor([ 9.3586,  7.7314, 10.0319]) 

dz/dx = tensor([ 9.3586,  7.7314, 10.0319], grad_fn=<AddBackward0>) 



In [11]:
# .grad command will keep adding gredient. Have to set to zero afterward.

weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad) # the gradient for the second epoch is WRONG.

print('--------')

weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # this is required! 
    # now the gradient is correct.

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
--------
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [12]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2

print('loss =', loss)

# backward pass
# dl/dw = dl/dy_hat * dy_hat/dw = 2(y_hat - y) * x
# = 2*y_hat*x - 2*y*x = 2*w*x*x - 2*y*x
# = 2*1*1*1 - 2*2*1 = 2 - 4 = -2
 
loss.backward()
print('dloss/dw =', w.grad)

loss = tensor(1., grad_fn=<PowBackward0>)
dloss/dw = tensor(-2.)
