# 00. PyTorch Fundamentals

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print(torch.__version__)

2.2.1+cu121


## Introduction to Tensors in PyTorch

### Creating tensors

In [2]:
# scalar
scalar = torch.tensor(3)
scalar

tensor(3)

In [3]:
type(scalar)

torch.Tensor

In [4]:
scalar.ndim

0

In [5]:
# Get tensor back as Python int
# Converts element to scalar. Cannot be used on higher ndmim objects
scalar.item()

3

In [6]:
# Vector
vector = torch.tensor([1,2])
vector

tensor([1, 2])

In [7]:
# Number of dimension (ndim) corresponds to number of square brackets
vector.ndim

1

In [8]:
# Slicing
vector[0:2]

tensor([1, 2])

In [9]:
# Matrix
# For simplicity use numpy to create a 3x3 array for torch.tensor to use
arr = np.arange(0,9).reshape(3,3)
matrix = torch.tensor(arr)
matrix

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [10]:
matrix.ndim

2

In [11]:
matrix[1]

tensor([3, 4, 5])

In [12]:
matrix[0:2]

tensor([[0, 1, 2],
        [3, 4, 5]])

In [13]:
matrix.shape

torch.Size([3, 3])

In [14]:
# Tensor
arr2 = np.arange(0,18).reshape(2,3,3)
tensor = torch.tensor(arr2)
tensor

tensor([[[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8]],

        [[ 9, 10, 11],
         [12, 13, 14],
         [15, 16, 17]]])

In [15]:
tensor.shape

torch.Size([2, 3, 3])

In [16]:
tensor.ndim

3

In [17]:
test = torch.tensor([[[
        [[0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8]],

        [[ 9, 10, 11],
         [12, 13, 14],
         [15, 16, 17]]]]])

test.shape

torch.Size([1, 1, 2, 3, 3])

### Random tensors

Why random tensors? 

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

In [18]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.2816, 0.4907, 0.4271, 0.1842],
        [0.9431, 0.0707, 0.3918, 0.3404],
        [0.5603, 0.7571, 0.7964, 0.5056]])

In [19]:
random_tensor.ndim

2

In [20]:
random_tensor.shape

torch.Size([3, 4])

In [21]:
# Create a random tensor with similar shape to an image tensor 
random_image_size_tensor = torch.rand(size=(3, 224, 224)) # colour channels, height, width
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

### Zeros and ones

In [22]:
# Create a tensor of all zeros
zeroes = torch.zeros(size=(3,4))
zeroes

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [23]:
# Create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [24]:
zeroes.dtype, ones.dtype

(torch.float32, torch.float32)

### Creating a range of tensors and tensors-like

In [25]:
# Use torch.arange(), returns a 1-D tensor
torch.arange(0, 10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
torch.arange(start=0, end=101, step=2)

tensor([  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,  26,
         28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,  52,  54,
         56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,  82,
         84,  86,  88,  90,  92,  94,  96,  98, 100])

In [27]:
# Creating tensors like
# Often, when you’re performing operations on two or more tensors, they will need to be of the same shape - that is, having the same number of dimensions and the same number of cells in each dimension. For that, we have the torch.*_like() methods:
x = torch.empty(2, 2, 3)
print(x.shape)
print(x)

empty_like_x = torch.empty_like(x)
print(empty_like_x.shape)
print(empty_like_x)

zeros_like_x = torch.zeros_like(x)
print(zeros_like_x.shape)
print(zeros_like_x)

ones_like_x = torch.ones_like(x)
print(ones_like_x.shape)
print(ones_like_x)

rand_like_x = torch.rand_like(x)
print(rand_like_x.shape)
print(rand_like_x)

torch.Size([2, 2, 3])
tensor([[[6.9193e+24, 3.0915e-41, 1.0000e+00],
         [1.0000e+00, 1.0000e+00, 1.0000e+00]],

        [[1.0000e+00, 1.0000e+00, 1.0000e+00],
         [1.0000e+00, 1.0000e+00, 1.0000e+00]]])
torch.Size([2, 2, 3])
tensor([[[2.7253e+20, 1.7728e+28, 1.4226e-13],
         [6.2608e+22, 4.7428e+30, 7.7781e+31]],

        [[1.8515e+28, 6.8794e+11, 2.7253e+20],
         [3.0866e+29, 1.1824e+22, 7.0976e+22]]])
torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.]]])
torch.Size([2, 2, 3])
tensor([[[0.4361, 0.8379, 0.4258],
         [0.6417, 0.2052, 0.8083]],

        [[0.3727, 0.6507, 0.3971],
         [0.5283, 0.5204, 0.1976]]])


### Tensor datatypes

In [28]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # what datatype is the tensor (e.g. float16 or float32)
                               device=None, # what device the tensor is on (default is cpu; cuda is gpu)
                               requires_grad=False # whether or not to track gradients with this tensors operations
                              )

float_32_tensor, float_32_tensor.dtype
# dtype is float32 even though dtype=None -> float32 is default

(tensor([3., 6., 9.]), torch.float32)

In [29]:
# Float 16 tensor: sacrifice some detail of how the numbers are represented <-> calculate faster and take up less space in memory as compared to float 32
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
float_16_tensor, float_16_tensor.dtype

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

The available datatypes in PyTorch are listed in https://pytorch.org/docs/stable/tensors.html

**Note**: Tensor datatypes is one of the 3 big errors encountered in PyTorch and deep learning:
1. Tensor is not right datatype
2. Tensor is not right shape
3. Tensor is not on the right device

Precision in computing: https://en.wikipedia.org/wiki/Precision_(computer_science)

In [30]:
float_32_tensor*float_16_tensor # not all operations throw an error

tensor([ 9., 36., 81.])

In [31]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [32]:
float_32_tensor*int_32_tensor

tensor([ 9., 36., 81.])

### Getting attributes from tensors

1. Tensor is not right datatype - to get datatype from a tensor, can use `tensor.dtype`
2. Tensor is not right shape - to get shape from a tensor, can use `tensor.shape`
3. Tensor is not on the right device - to get device from a tensor, can use `tensor.device`

In [33]:
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.2211, 0.6066, 0.8054, 0.2971],
        [0.6062, 0.1424, 0.9666, 0.8330],
        [0.2086, 0.4768, 0.5447, 0.2628]])

In [34]:
# Get  attributes from tensor
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.2211, 0.6066, 0.8054, 0.2971],
        [0.6062, 0.1424, 0.9666, 0.8330],
        [0.2086, 0.4768, 0.5447, 0.2628]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

In [35]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [36]:
# Multiply tensor by 10 (element-wise)
tensor = torch.tensor([1,2,3])
tensor * 10

tensor([10, 20, 30])

In [37]:
# Subtract 10
tensor - 10

tensor([-9, -8, -7])

In [38]:
# PyTorch has built-in functions
print(torch.mul(tensor, 10))
print(torch.add(tensor, 10))
print(torch.subtract(tensor, 10))

tensor([10, 20, 30])
tensor([11, 12, 13])
tensor([-9, -8, -7])


### Matrix multiplication

Two main ways of performing multiplication in neural networks and deep learning:

1. Element-wise operation (scalar multiplication)
2. Matrix multiplication

There are two main rules that performing matrix multiplication needs to satisfy:
1. The **inner dimensions** must match:
* `(3,2) @ (3,2)` won't work
* `(2,3) @ (3,2)` will work
* `(3,2) @ (2,3)` will work
2. The resulting matrix has the shape of the **outer dimensions**:
* `(2,3) @ (3,2)` -> `(2,2)`
* `(3,2) @ (2,3)` -> `(3,3)`

In [39]:
# Element-wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [45]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 541 µs, sys: 0 ns, total: 541 µs
Wall time: 412 µs


tensor(14)

In [47]:
%%time
tensor @ tensor

CPU times: user 315 µs, sys: 62 µs, total: 377 µs
Wall time: 313 µs


tensor(14)

In [46]:
%%time

dot_product = 0

for i in range(tensor.shape[0]):
    dot_product += tensor[i] * tensor[i]

dot_product


CPU times: user 1.97 ms, sys: 0 ns, total: 1.97 ms
Wall time: 1.95 ms


tensor(14)

### One of the most common errors in deep learning: shape errors

In [52]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                        [3,4],
                        [5,6]])

tensor_B = torch.tensor([[7,10],
                        [8,11],
                        [9,12]])

torch.matmul(tensor_A, tensor_B)
torch.mm(tensor_A, tensor_B) # same as above; torch.mm is an alias for torch.matmul

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [53]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

To fix our tensor shape issues, we can use the **transpose** `.T` to change the shape of tensor_B.

In [56]:
tensor_B, tensor_B.shape

(tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]),
 torch.Size([3, 2]))

In [57]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [62]:
# The matrix multiplication works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.T.shape}")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B = {tensor_B.T.shape}")
print(f"Multiplying: {tensor_A.shape} @ {tensor_B.T.shape} <- inner dimensions must match")
print("Output:\n")
output = torch.mm(tensor_A, tensor_B.T)
print(output)
print(f"\nShape of output: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([2, 3])
New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B = torch.Size([2, 3])
Multiplying: torch.Size([3, 2]) @ torch.Size([2, 3]) <- inner dimensions must match
Output:

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

Shape of output: torch.Size([3, 3])


### Finding the min, max, mean, sum, etc (tensor aggregation)

In [63]:
# Create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [64]:
# Find the min
torch.min(x)

tensor(0)

In [65]:
# Find the max
torch.max(x)

tensor(90)

In [68]:
# Find the mean - note: the torch.mean() function requires a tensor of float32 or float64 dtype to work
torch.mean(x.type(torch.float32))

tensor(45.)

In [69]:
# Find the sum
torch.sum(x)

tensor(450)

### Finding the positional min and max

In [72]:
# Find the position in tensor that has the minimum value with argmin() -> returns index position of target tensor where the minimum value occurs
torch.argmin(x)

tensor(0)

In [71]:
# Positional maximum
torch.argmax(x)

tensor(9)

### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side-by-side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - ass a `1` dimension to a target tensor 
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [80]:
# Create a tensor
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [81]:
# Reshape (shape must be compatible with number of elements)
x_reshaped = x.reshape(1,9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [85]:
# Change the view
z = x.view(3,3)
z, z.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

Changing `z` changes `x` and vice versa (because a view of a tensor shares the same memory as the original tensor)

In [86]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3.],
         [5., 5., 6.],
         [5., 8., 9.]]),
 tensor([5., 2., 3., 5., 5., 6., 5., 8., 9.]))

In [87]:
x[3] = 99
z, x

(tensor([[ 5.,  2.,  3.],
         [99.,  5.,  6.],
         [ 5.,  8.,  9.]]),
 tensor([ 5.,  2.,  3., 99.,  5.,  6.,  5.,  8.,  9.]))

In [89]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x])
x_stacked, x_stacked.shape

(tensor([[ 5.,  2.,  3., 99.,  5.,  6.,  5.,  8.,  9.],
         [ 5.,  2.,  3., 99.,  5.,  6.,  5.,  8.,  9.],
         [ 5.,  2.,  3., 99.,  5.,  6.,  5.,  8.,  9.]]),
 torch.Size([3, 9]))

In [90]:
# Stack tensors next to each other
x_stacked = torch.stack([x, x, x], dim=1)
x_stacked, x_stacked.shape

(tensor([[ 5.,  5.,  5.],
         [ 2.,  2.,  2.],
         [ 3.,  3.,  3.],
         [99., 99., 99.],
         [ 5.,  5.,  5.],
         [ 6.,  6.,  6.],
         [ 5.,  5.,  5.],
         [ 8.,  8.,  8.],
         [ 9.,  9.,  9.]]),
 torch.Size([9, 3]))

In [None]:
# Squeeze and unsqueeze