In [1]:
import torch
import numpy as np

In [2]:
torch.__version__

'2.5.1'

In [3]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
scalar.item()

7

In [6]:
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [10]:
MATRIX.ndim

2

In [11]:
MATRIX.shape

torch.Size([2, 2])

In [12]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [13]:
TENSOR.ndim

3

In [14]:
# dimensions are outer bracket to inner bracket, 1 dimension of 3 rows x 3 columns
TENSOR.shape

torch.Size([1, 3, 3])

In [15]:
random_tensor = torch.rand(size=(3, 4))
random_tensor, random_tensor.dtype

(tensor([[0.9862, 0.4405, 0.2347, 0.2396],
         [0.2631, 0.0125, 0.0134, 0.3498],
         [0.2843, 0.2265, 0.7612, 0.6368]]),
 torch.float32)

In [16]:
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [17]:
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [18]:
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [19]:
zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [20]:
ten_zeros = torch.zeros_like(input=zero_to_ten)
ten_ones = torch.ones_like(input=zero_to_ten)
ten_zeros, ten_ones

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))

In [21]:
# higher precision -> better performance but more compute needed
# lower precision data types -> faster computation, may sacrifice performance
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # default is torch.float32
                               device=None,
                               requires_grad=False) # True records operations performed on the tensor

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

Common issues come from shape mismatch, inconsistent data types, and tensors not being on the same device (CPU, GPU)

In [22]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16)
float_16_tensor.dtype

torch.float16

In [23]:
some_tensor = torch.rand(3, 4)
print(some_tensor)
print(f'Shape of tensor: {some_tensor.shape}')
print(f'Data type of tensor: {some_tensor.dtype}')
print(f'Device tensor is stored on: {some_tensor.device}')

tensor([[0.6015, 0.3919, 0.1304, 0.6772],
        [0.1002, 0.2800, 0.4646, 0.3981],
        [0.1365, 0.2164, 0.8187, 0.3231]])
Shape of tensor: torch.Size([3, 4])
Data type of tensor: torch.float32
Device tensor is stored on: cpu


In [24]:
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [25]:
tensor * 10

tensor([10, 20, 30])

In [26]:
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [27]:
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [28]:
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [29]:
print(tensor, '*', tensor)
print('Equals:', tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [30]:
# for matmul, (a, b) @ (b, c), inner dimension b must be the same, result is dimension (a, c)
# for loops bad, computationally expensive
# matmul also called dot product
torch.matmul(tensor, tensor), tensor @ tensor

(tensor(14), tensor(14))

In [31]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)
tensor_B = torch.tensor([[7, 8],
                         [9, 10],
                         [11, 12]], dtype=torch.float32)

In [32]:
# torch.transpose(input, dim0, dim1) and TENSOR.T
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 23.,  29.,  35.],
        [ 53.,  67.,  81.],
        [ 83., 105., 127.]])

Output shape: torch.Size([3, 3])


In [33]:
torch.manual_seed(42)
linear = torch.nn.Linear(in_features=2,
                         out_features=6)
x = tensor_A
# output = xW.t + b where x is the input and W.t has shape(in_features, out_features)
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


In [34]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [35]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
print(f"Mean: {x.type(torch.float32).mean()}") # some methods require float32 data type
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [36]:
print(f'Index of max: {x.argmax()}')
print(f'Index of min: {x.argmin()}')

Index of max: 9
Index of min: 0


In [37]:
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [38]:
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [39]:
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [40]:
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [41]:
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [42]:
x[0] = 10
x, z

(tensor([10.,  2.,  3.,  4.,  5.,  6.,  7.]),
 tensor([[10.,  2.,  3.,  4.,  5.,  6.,  7.]]))

In [43]:
z[:, 0] = 1
x, z

(tensor([1., 2., 3., 4., 5., 6., 7.]), tensor([[1., 2., 3., 4., 5., 6., 7.]]))

In [44]:
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.]])

In [45]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [46]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [47]:
x_original = torch.rand(size=(224, 224, 3))
x_permuted = x_original.permute(2, 0, 1)
print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [48]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [49]:
# : means all values in this dimension
x[:, 0], x[:, :, 1]

(tensor([[1, 2, 3]]), tensor([[2, 5, 8]]))

In [50]:
# NumPy array to tensor
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32) # NumPy default data type is float64, so convert to float32 for PyTorch tensors
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [51]:
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [52]:
# tensor to NumPy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [53]:
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)
random_tensor_A, random_tensor_B

(tensor([[0.8016, 0.3649, 0.6286, 0.9663],
         [0.7687, 0.4566, 0.5745, 0.9200],
         [0.3230, 0.8613, 0.0919, 0.3102]]),
 tensor([[0.9536, 0.6002, 0.0351, 0.6826],
         [0.3743, 0.5220, 0.1336, 0.9666],
         [0.9754, 0.8474, 0.8988, 0.1105]]))

In [54]:
RANDOM_SEED = 314
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)
print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")

Tensor C:
tensor([[0.7196, 0.6295, 0.6667, 0.3385],
        [0.8522, 0.3126, 0.5006, 0.4643],
        [0.0083, 0.4469, 0.8029, 0.5512]])

Tensor D:
tensor([[0.7196, 0.6295, 0.6667, 0.3385],
        [0.8522, 0.3126, 0.5006, 0.4643],
        [0.0083, 0.4469, 0.8029, 0.5512]])



In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [60]:
torch.cuda.device_count()

1

In [61]:
# tensor to GPU
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.device)
tensor_on_gpu = tensor.to(device)
print(tensor_on_gpu)

tensor([1, 2, 3]) cpu
tensor([1, 2, 3], device='cuda:0')


In [63]:
# tensor from GPU to CPU to NumPy
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

Exercises

In [77]:
rand_tensor_A = torch.rand(7, 7)
rand_tensor_A

tensor([[0.4820, 0.8198, 0.9971, 0.6984, 0.5675, 0.8352, 0.2056],
        [0.5932, 0.1123, 0.1535, 0.2417, 0.7262, 0.7011, 0.2038],
        [0.6511, 0.7745, 0.4369, 0.5191, 0.6159, 0.8102, 0.9801],
        [0.1147, 0.3168, 0.6965, 0.9143, 0.9351, 0.9412, 0.5995],
        [0.0652, 0.5460, 0.1872, 0.0340, 0.9442, 0.8802, 0.0012],
        [0.5936, 0.4158, 0.4177, 0.2711, 0.6923, 0.2038, 0.6833],
        [0.7529, 0.8579, 0.6870, 0.0051, 0.1757, 0.7497, 0.6047]])

In [78]:
rand_tensor_B = torch.rand(1, 7)
print(torch.matmul(rand_tensor_A, rand_tensor_B.T))

tensor([[2.2563],
        [0.9122],
        [1.5943],
        [2.1509],
        [0.9287],
        [1.0734],
        [1.2800]])


In [83]:
torch.cuda.manual_seed(1234)
rand_tensor_C = torch.rand(2, 3).to(device)
rand_tensor_D = torch.rand(2, 3).to(device)
result = torch.matmul(rand_tensor_C, rand_tensor_D.T)
result

tensor([[0.5200, 0.9671],
        [0.7139, 1.3280]], device='cuda:0')

In [84]:
result.max(), result.min()

(tensor(1.3280, device='cuda:0'), tensor(0.5200, device='cuda:0'))

In [85]:
result.argmax(), result.argmin()

(tensor(3, device='cuda:0'), tensor(0, device='cuda:0'))

In [86]:
torch.manual_seed(7)
rand_tensor = torch.rand(1, 1, 1, 10)
squeezed_tensor = rand_tensor.squeeze()
print(f'original tensor: {rand_tensor}')
print(f'original tensor shape: {rand_tensor.shape}')
print(f'squeezed tensor: {squeezed_tensor}')
print(f'squeezed tensor shape: {squeezed_tensor.shape}')

original tensor: tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]])
original tensor shape: torch.Size([1, 1, 1, 10])
squeezed tensor: tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513])
squeezed tensor shape: torch.Size([10])
