Learning basics of PyTorch by Aladdin Persson on youtube. This file is created by me.

In [None]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [None]:
my_tensor = torch.tensor([[1,2,3], [4,5,6]], dtype=torch.float32,
                      device=device, requires_grad=True)

print(my_tensor)
print(my_tensor.device)
print(my_tensor.shape)
print(my_tensor.dtype)
print(my_tensor.requires_grad)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)
cpu
torch.Size([2, 3])
torch.float32
True


In [None]:
# Some other common initialization methods.
X = torch.empty(size=(3,3)) # Creates a tensor with garbage values in memory.
print(X)
A = torch.zeros((3,3))
print(A)
B = torch.ones((3,3))
print(B)
C = torch.rand((3,3)) # Random numbers between uniform distribution 0 and 1.
print(C)
D = torch.randn((3,3))  # Random numbers between normal distribution.
print(D)
I = torch.eye(3)  # Creates an identity matrix of 3x3.
print(I)
J = torch.arange(start=0, end=5, step=1)  # Works just like the numpy arange function.
print(J)
K = torch.linspace(start=0.1, end=1, steps=10)  # Creates a tensor with 10 digits between the start(inclusive) and the end
print(K)
L = torch.empty((1,5)).normal_(mean=0, std=1) # Distribute values normally with mean 0 and standard deviation 1.
print(L)
M = torch.empty((1,5)).uniform_(0,1)  # Uniform distribution between 0 and 1.
print(M)
N = torch.diag(torch.ones(3)) # Creates a diagonal matrix with the 1D tensor given inside brackets.
print(N)

tensor([[1.0974e-35, 0.0000e+00, 4.4842e-44],
        [0.0000e+00,        nan, 1.5554e-43],
        [1.0948e+21, 9.2868e-04, 1.3556e-19]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.9227, 0.8553, 0.9620],
        [0.7838, 0.2566, 0.7265],
        [0.1440, 0.7198, 0.4988]])
tensor([[ 0.0348, -0.9829, -1.3596],
        [ 0.4698,  0.1119, -2.2748],
        [-0.6074,  0.5785, -0.3979]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])
tensor([0, 1, 2, 3, 4])
tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
        1.0000])
tensor([[-0.2607,  1.1993, -0.0735, -0.4172,  0.4657]])
tensor([[0.6828, 0.2833, 0.2449, 0.6477, 0.1417]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


In [None]:
# Initialize and convert tensors to other datatypes.
tensor = torch.arange(4)
tensor.dtype  # Default initialized as int64.

print(tensor)

# Converting tensor to boolean values.
print(tensor.bool())
print(tensor.short())
print(tensor.long())
print(tensor.half())
print(tensor.float())
print(tensor.double())

tensor([0, 1, 2, 3])
tensor([False,  True,  True,  True])
tensor([0, 1, 2, 3], dtype=torch.int16)
tensor([0, 1, 2, 3])
tensor([0., 1., 2., 3.], dtype=torch.float16)
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float64)


In [None]:
import numpy as np

# Array to Tensor Conversion and Vice Versa.
np_array = np.zeros((5,5))
tensor = torch.from_numpy(np_array)
numpy_array_back = tensor.numpy()

print(tensor)
print(numpy_array_back)
numpy_array_back

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64)
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [None]:
# Mathematical Operations on Tensors

x = torch.tensor([1,2,3])
y = torch.tensor([1,4,9])

# Addition
z1 = torch.empty(3)
torch.add(x,y,out=z1)

z2 = torch.add(x,y)

z = x + y

# Subtraction
z = x - y

# Division (element by element)
torch.true_divide(y, x)

# Inplace operations --> A copy of the tensor is not created. The original tensor is modified.
t = torch.zeros(3)
t.add_(x) # Function names ending with _ means it's an inplace function (modifying original tensor)
t += x  # This is also an inplace operation
# t = t + x is not an inplace operation.

# Exponent
z = x.pow(2)
z = x ** 2 # Does the same thing.

# Matrix Multiplication
x1 = torch.rand((2,5))
x2 = torch.rand((5,3))
x3 = torch.matmul(x1, x2) # matmul has an alias mm. So instead of matmul we can write mm too.
x4 = x1.mm(x2)  # Basically the same as above.

print(x1, '\n', x2, '\n', x3, '\n', x4)

print(x4.T) # Transpose of the matrix.

# Matrix Exponentiation. (Matrix multiplied by itself)
matrix_exp = torch.rand(5,5)
print(matrix_exp.matrix_power(3))

# Element wise multiplication
z = x * y
print(x, '\n', y, '\n', z)

# Dot Product
z = torch.dot(x, y)
print(z)

# Batch Matrix Multiplication
batch = 32
n = 10
m = 20
p = 30

tensor1 = torch.rand((batch, n, m))
tensor2 = torch.rand((batch, m, p))
out_bmm = torch.bmm(tensor1, tensor2) # tensor1 and tensor2 have (n x m) and (m x p) so m is common. Output is (batch, n, p)
out_bmm.shape

tensor([[0.3984, 0.8466, 0.6585, 0.0307, 0.5979],
        [0.6459, 0.6159, 0.8868, 0.0447, 0.9781]]) 
 tensor([[0.8692, 0.5521, 0.2668],
        [0.3772, 0.5255, 0.5738],
        [0.1803, 0.6629, 0.3628],
        [0.1252, 0.0497, 0.8808],
        [0.7752, 0.3891, 0.5463]]) 
 tensor([[1.2518, 1.3355, 1.1847],
        [1.7175, 1.6509, 1.4212]]) 
 tensor([[1.2518, 1.3355, 1.1847],
        [1.7175, 1.6509, 1.4212]])
tensor([[1.2518, 1.7175],
        [1.3355, 1.6509],
        [1.1847, 1.4212]])
tensor([[0.9920, 1.3687, 0.6635, 1.4593, 1.6396],
        [1.4326, 1.8476, 0.8803, 1.9202, 2.0829],
        [1.7427, 1.8776, 0.9999, 2.3117, 2.0955],
        [0.5143, 0.5410, 0.3461, 0.8853, 0.7472],
        [1.6845, 1.9022, 1.0203, 2.3684, 2.2226]])
tensor([1, 2, 3]) 
 tensor([1, 4, 9]) 
 tensor([ 1,  8, 27])
tensor(36)


torch.Size([32, 10, 30])

In [None]:
# Example of Broadcasting
x1 = torch.rand((5,5))
x2 = torch.rand((1,5))

z = x1 - x2 # This is absurd as the dimensions dont match. But this is a legal operation.
# What happens here is, in x2, it is expanded to 5,5 with the same elements in each row and then subtracted.
# This is called broadcasting.

print(x1, '\n', x2, '\n', z)

z = x1 ** x2
print(x1, '\n', x2, '\n', z)

tensor([[0.1247, 0.1560, 0.0544, 0.8480, 0.8953],
        [0.7455, 0.5158, 0.6269, 0.4760, 0.0146],
        [0.0252, 0.2297, 0.9252, 0.7960, 0.2702],
        [0.0472, 0.1001, 0.1464, 0.8781, 0.9974],
        [0.3176, 0.8570, 0.2827, 0.9043, 0.6792]]) 
 tensor([[0.7381, 0.9003, 0.3970, 0.1288, 0.4519]]) 
 tensor([[-0.6134, -0.7442, -0.3426,  0.7191,  0.4434],
        [ 0.0074, -0.3845,  0.2298,  0.3472, -0.4373],
        [-0.7129, -0.6706,  0.5282,  0.6672, -0.1817],
        [-0.6909, -0.8002, -0.2506,  0.7492,  0.5455],
        [-0.4206, -0.0433, -0.1143,  0.7755,  0.2272]])
tensor([[0.1247, 0.1560, 0.0544, 0.8480, 0.8953],
        [0.7455, 0.5158, 0.6269, 0.4760, 0.0146],
        [0.0252, 0.2297, 0.9252, 0.7960, 0.2702],
        [0.0472, 0.1001, 0.1464, 0.8781, 0.9974],
        [0.3176, 0.8570, 0.2827, 0.9043, 0.6792]]) 
 tensor([[0.7381, 0.9003, 0.3970, 0.1288, 0.4519]]) 
 tensor([[0.2151, 0.1878, 0.3148, 0.9790, 0.9512],
        [0.8051, 0.5510, 0.8307, 0.9088, 0.1481],
        [0.0