# 00. PyTorch Fundamentals

First, we import the necessary libraries.

In [None]:
import torch
import numpy as np

print(torch.__version__)

2.7.0


## Introduction to Tensors

### Creating tensors

PyTorch tensors are created using `torch.Tensor()`

In [5]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [6]:
scalar.ndim

0

In [7]:
# get tensor back as Python int
scalar.item()

7

In [9]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [10]:
vector.ndim

1

In [11]:
vector.shape

torch.Size([2])

In [None]:
# Matrix 
MATRIX = torch.tensor([[7,8], [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX[0][1]

tensor(8)

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [28]:
# Tensor 
TENSOR = torch.tensor([[[1,2,3], 
                        [4,5,6], 
                        [6,7,8]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [6, 7, 8]]])

In [29]:
TENSOR.ndim

3

In [30]:
TENSOR.shape

torch.Size([1, 3, 3])

In [31]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [6, 7, 8]])

Scalars and vectors are usually named by lower case variables and matrices and tensors are usually named by an upper case variable.

### Random Tensors

Why random tensors? Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

In [36]:
# Create a random tensor of shape (3,4). Returns a tensor filled with random numbers from a uniform distribution on the interval 

random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.8605, 0.5427, 0.6301, 0.9966],
        [0.4097, 0.1481, 0.7933, 0.9278],
        [0.9383, 0.8417, 0.5006, 0.9078]])

In [38]:
# Create a random tensor with similar shape to an image tensor

random_image_size_tensor = torch.rand(size=(224,224,3)) # height, width, colour channel
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [None]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [40]:
# Create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
# Default data type in PyTorch is torch.float32
ones.dtype

torch.float32

### Create a range of tensors and tensors-like

In [None]:
# Use torch.range() -> deprecated
torch.range(0,10)

  torch.range(0,10)


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [51]:
# Better to use torch.arange()
one_to_ten = torch.arange(start=1,end=11,step=2)
one_to_ten

tensor([1, 3, 5, 7, 9])

In [52]:
# Creating tensors like -> takes the shape of the input
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0])

### Tensor datatypes

**Note:** Tensor datastypes is one of the 3 big errors you'll run into with PyTorch & deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

In [None]:
# Float32 tensor -> Default is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None,              # datatype of float tensors
                               device=None,             # What device is your tensor on. By default cpu, operations between two tensors that don't live on the same device yields an error
                               requires_grad=False      # Whether or not to track gradients with this tensors operations
                               )
float_32_tensor.dtype

torch.float32

In [56]:
# Float16 tensor 
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [57]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.], dtype=torch.float16)

In [61]:
int_32_tensor = torch.tensor([3,6,9], dtype=torch.int64)
int_32_tensor * float_32_tensor

tensor([ 9., 36., 81.], dtype=torch.float16)

Pretty robust regarding multiplications of tensors of different datatypes.

### Getting information from tensors (tensor attributes)

1. Datatype - use `tensor.dtype`
2. Shape - use `tensor.shape`
3. Device - use `tensor.device`

In [62]:
some_tensor = torch.rand(3,4)
some_tensor.dtype, some_tensor.shape, some_tensor.device

(torch.float32, torch.Size([3, 4]), device(type='cpu'))

### Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication
* Division
* Matrix multiplication

In [79]:
# Addition
tensor = torch.tensor([1,2,3])
tensor + 100

tensor([101, 102, 103])

In [66]:
# Subtraction 
tensor - 10

tensor([-9, -8, -7])

In [65]:
# Multiplication
tensor * 10

tensor([10, 20, 30])

In [67]:
# Division
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [71]:
# Try out PyTorch in-built functions
torch.mul(tensor,10)        # multiplication
torch.add(tensor, 10)       # addition
torch.subtract(tensor, 10)  # subtraction

tensor([-9, -8, -7])

In [74]:
# Elementwise matrix multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor*tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [75]:
# Matrix multiplication
torch.matmul(tensor,tensor)

tensor(14)

In [76]:
# Matrix multiplication 2
tensor @ tensor

tensor(14)

### One of the most common errors in deep learning: shape errors

In [221]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                        [3,4], 
                        [5,6]])

tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

# matrix multiplication 3 -> gives in error since shapes are not compatible for matrix multiplication
# torch.mm(tensor_A, tensor_B)

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**.

In [83]:
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Finding the min, max, mean, sum, etc (tensor aggregation)

In [92]:
# Minimum
x = torch.arange(0,100,10)
print(x)
x.min(), torch.min(x)

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])


(tensor(0), tensor(0))

In [93]:
# Maximum 
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
# Argmax
torch.argmax(x), x.argmax()

(tensor(9), tensor(9))

In [103]:
# Argmin
torch.argmin(x), x.argmin()

(tensor(0), tensor(0))

### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - Reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - Combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeezing - Removes all `1` dimensions from a a tensor
* Unsqueezing - Add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted in a certain way

In [124]:
x = torch.arange(1,11)
x, x.shape

(tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]), torch.Size([10]))

In [125]:
# Add an extra deimension
x_reshaped = x.reshape(1,10)
x_reshaped, x_reshaped.shape

(tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]]), torch.Size([1, 10]))

In [None]:
# Change the view, x remains in its original shape!
z = x.view(10,1)
z, z.shape

(tensor([[ 1],
         [ 2],
         [ 3],
         [ 4],
         [ 5],
         [ 6],
         [ 7],
         [ 8],
         [ 9],
         [10]]),
 torch.Size([10, 1]))

In [128]:
# Changing z changes x (because a view of a tensor shares the same memory as the original input)
z[0] = 5
z,x

(tensor([[ 5],
         [ 2],
         [ 3],
         [ 4],
         [ 5],
         [ 6],
         [ 7],
         [ 8],
         [ 9],
         [10]]),
 tensor([ 5,  2,  3,  4,  5,  6,  7,  8,  9, 10]))

In [134]:
# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x])
x_stacked

tensor([[ 5,  2,  3,  4,  5,  6,  7,  8,  9, 10],
        [ 5,  2,  3,  4,  5,  6,  7,  8,  9, 10],
        [ 5,  2,  3,  4,  5,  6,  7,  8,  9, 10],
        [ 5,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [130]:
# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x], dim=1)
x_stacked

tensor([[ 5,  5,  5,  5],
        [ 2,  2,  2,  2],
        [ 3,  3,  3,  3],
        [ 4,  4,  4,  4],
        [ 5,  5,  5,  5],
        [ 6,  6,  6,  6],
        [ 7,  7,  7,  7],
        [ 8,  8,  8,  8],
        [ 9,  9,  9,  9],
        [10, 10, 10, 10]])

In [150]:
# Squeeze - removes all the dimensions of size 1
x = torch.arange(10)
x_reshaped = x.reshape(1,10)
print(f"Reshaped Tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")
x_squeezed = x_reshaped.squeeze()
print(f"New tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Reshaped Tensor: tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
Previous shape: torch.Size([1, 10])
New tensor: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
New shape: torch.Size([10])


In [156]:
# Unsqueeze - Add a single dimension to a target tensor at a specific dimension
x_unsqueezed0 = x_squeezed.unsqueeze(0)
x_unsqueezed1 = x_squeezed.unsqueeze(1)
x_unsqueezed0, x_unsqueezed1

(tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 tensor([[0],
         [1],
         [2],
         [3],
         [4],
         [5],
         [6],
         [7],
         [8],
         [9]]))

In [161]:
# permute - rearrange the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224,224,3))   # height, width, colour_channels

# Permute the origina tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1)    # colour_channels, height, width
print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [167]:
# Changing x_original also changes x_pernmuted
x_original[0,0,0] = 728
x_permuted[0,0,0]

tensor(728.)

### Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy.

In [172]:
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [177]:
# Indexing our new tensor
x[0], x[0][0], x[0,0], x[0,0,2]

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor([1, 2, 3]),
 tensor(3))

In [181]:
# You can also use ":" to select all of a target dimension
x[:, 0], x[0,0,:], x[:,:,1]

(tensor([[1, 2, 3]]), tensor([1, 2, 3]), tensor([[2, 5, 8]]))

### PyTorch tensors and NumPy

NumPy is a popular scientific Python numerical computing library. ANd because of this, PyTorch has functionality to interact with it.
* Data in NumPy, want in PyTorch tensor - `torch.from_numpy(ndarray)`
* PyTorch tensor to NumPy - ` torch.Tensor.numpy()`

In [None]:
# NumPy array to tensor - Be aware that the NumPy default datatype is float64 and torch reflects that datatype
array = np.arange(1.,8.)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
# Change the value of array, what will this do to tensor? - Nothing
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [192]:
# Tensor to NumPy array - Default datatype of PyTorch is reflected by NumPy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [194]:
# Change the tensor, what happens to numpy_tensor? - Again nothing
tensor = tensor + 1
tensor, numpy_tensor

(tensor([3., 3., 3., 3., 3., 3., 3.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility (trying to take random out of random)

In short, how a neural network learns is start with random numbers, perform tensor operations, update random numbers to try and make them better representations of the data, again, again, ...  
To reduce the randomness in neural networks and PyTorch comes the concept of a **random seed**.  
Essentially what the random seed does is "flavour" the randomness.

In [206]:
# Create two random tensors
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)
print(random_tensor_A == random_tensor_B)

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
# Let's make some random but reproducible tensors - manual_seed only works for one block of code!!!
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)

random_tensor_C == random_tensor_D

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## Running tensors and PyTorch objects on the GPUs (and making computations faster)

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working behind the scenes to make everything hunky dory.

### 1. Getting a GPU

1. Easiest - Use Google Colab for a free GPU (options to upgrade as well)
2. Use your own GPU - takes a little bit of setup and requires the investmenet of purchasing a GPU, there's lots of options...
3. Use cloud computing - GCP (Google), AWS (Amazon), Azure (Microsoft), ...

In [None]:
# Following works in Colab after selecting GPU - gives information about GPU
!nvidia-smi

### 2. Check for GPU access with PyTorch

In [None]:
# Check for GPU access with PyTorch
torch.cuda.is_available()

In [214]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
# On mac we can use
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


## 3. Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is because using a GPU results in faster computations

In [None]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1,2,3])
tensor.device

device(type='cpu')

In [215]:
# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu.device

device(type='mps', index=0)

### 4. Move tensors back to the CPU

In [None]:
# if tensor is on GPU, can't transform it to NumPy, since NumPy only works on cpu -> this gives an error
#tensor_on_gpu.numpy()

In [None]:
# To fix this issue, we can first set it to the CPU (both ways possible)
tensor_on_cpu = tensor_on_gpu.to("cpu").numpy()
tensor_on_cpu = tensor_on_gpu.cpu().numpy()

tensor_on_cpu

array([1, 2, 3])

## Exercises

In [222]:
# 2.Create a random tensor with shape (7, 7).
t1 = torch.rand(7,7)

# 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).
t2 = torch.rand(1,7)
t1 @ t2.T

tensor([[1.9625],
        [1.0950],
        [0.9967],
        [1.8910],
        [1.9205],
        [1.0674],
        [1.6949]])

In [227]:
# 4. Set the random seed to 0 and do exercises 2 & 3 over again.
torch.manual_seed(RANDOM_SEED)
t1 = torch.rand(7,7)
t1

tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566],
        [0.7936, 0.9408, 0.1332, 0.9346, 0.5936, 0.8694, 0.5677],
        [0.7411, 0.4294, 0.8854, 0.5739, 0.2666, 0.6274, 0.2696],
        [0.4414, 0.2969, 0.8317, 0.1053, 0.2695, 0.3588, 0.1994],
        [0.5472, 0.0062, 0.9516, 0.0753, 0.8860, 0.5832, 0.3376],
        [0.8090, 0.5779, 0.9040, 0.5547, 0.3423, 0.6343, 0.3644],
        [0.7104, 0.9464, 0.7890, 0.2814, 0.7886, 0.5895, 0.7539]])

In [228]:
# 6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).

torch.manual_seed(1234)
t1 = torch.rand(2,3).to(device)
t2 = torch.rand(2,3).to(device)

In [231]:
# 7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).
t3 = t1 @ t2.T

In [232]:
# 8. Find the maximum and minimum values of the output of 7
t3.max(), t3.min()

(tensor(0.5617, device='mps:0'), tensor(0.3647, device='mps:0'))

In [233]:
# 9. Find the maximum and minimum index values of the output of 7
t3.argmin(), t3.argmax()

(tensor(0, device='mps:0'), tensor(3, device='mps:0'))

In [235]:
# 10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

torch.manual_seed(7)
random_tensor = torch.rand(1,1,1,10)
print(random_tensor.shape)
random_tensor_squeezed = random_tensor.squeeze()
random_tensor_squeezed.shape

torch.Size([1, 1, 1, 10])


torch.Size([10])