In [1]:
!nvidia-smi

Mon Oct 28 20:50:16 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.89.02    Driver Version: 525.89.02    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:1A:00.0 Off |                  N/A |
| 30%   24C    P8    24W / 350W |      5MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:1B:00.0 Off |                  N/A |
| 30%   26C    P8    19W / 350W |      5MiB / 24576MiB |      0%      Default |
|       

In [1]:
import torch
torch.__version__

'2.3.1'

In [None]:
tensor_A = torch.tensor([[1, 2],
                        [3, 4],
                        [5, 6]])

tensor_B = torch.tensor([[7, 10],
                        [8, 11],
                        [9, 12]])
tensor_A.shape

torch.Size([3, 2])

In [None]:
tensor_B.T

tensor([[ 7,  8,  9],
        [10, 11, 12]])

In [None]:
tensor_B

tensor([[ 7, 10],
        [ 8, 11],
        [ 9, 12]])

In [None]:
tensor_B.T , tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [None]:
# The matrix multiplication operation works when tensor B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")


Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions
Output:

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

Output shape: torch.Size([3, 3])


## Finding the min, max, mean, sum e.t.c (tensor aggregation)

In [None]:
# create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [None]:
# Find the min
torch.min(x)

tensor(0)

In [None]:
x.min()

tensor(0)

In [None]:
# Find the max
torch.max(x)

tensor(90)

In [None]:
x.max()

tensor(90)

In [None]:
# Find the mean
torch.mean(x)

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [None]:
torch.mean(x.type(torch.float32))

tensor(45.)

In [None]:
torch.mean(x, dtype = torch.float32)

tensor(45.)

In [None]:
x.mean(dtype = torch.float32)

tensor(45.)

In [None]:
x.type(torch.float32).mean()

tensor(45.)

In [None]:
# Find the sum
torch.sum(x)

tensor(450)

In [None]:
x.sum()

## finding the positional min and max

In [None]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [None]:
# Find the position in the tensor that ha s the minimum value with argmin -> returns index value of target tensor where minimum value occurs
x.argmin()

tensor(0)

In [None]:
y = torch.tensor([6, 8, 9, 7, 16, 3, 8, 9, 10])
y.argmin()

tensor(5)

In [None]:
# Find the position in the tensor that has the maximum value with argmax() -> returns index value of target tensor where maximum value occurs
y.argmax()

tensor(4)

In [None]:
torch.argmax(y)

tensor(4)

## Reshaping, stacking, squeezing and unsqueezing tensors
* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack or side by side h stack )
* Squeezing - removes all '1' dimension from a target tensor
* Unsqueeze - adds a '1' dimension to a target tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [1]:
import torch
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [2]:
# Add an extra dimension
# dimensions should be compatible with original dimensions
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [3]:
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [4]:
x_reshaped = x.reshape(3, 3)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [5]:
x_reshaped = x.reshape(9, 1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [None]:
# Change the view
# view and reshape are quite similar but view shares the memory with the original tensor. this means in the example below
# z shares the same memory as x. changing in the value of z will change it in x too
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# changing z changes x (because a view of tensor shares the same memory as the original tensor)
z[0][0] = 5
z , x


(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

## Stack tensors on top of each other



In [None]:
# stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
# stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [None]:
# torch.squeeze() - removes all single dimensions from a target tensor
x = torch.zeros(2, 1, 2, 1, 2)
x.squeeze()

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [None]:
x_reshaped, x_reshaped.shape

(tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [None]:
x_reshaped = x_reshaped.reshape(1,9)
x_reshaped, x_reshaped.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
x_reshaped.squeeze(), x_reshaped.squeeze().shape

(tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimensions from x_reshaped
x_squeezed =  x_reshaped.squeeze()
print(f"\nNew Tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")


Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New Tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [None]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim (dimension)
print(f"Previous target: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# Add an extra dimension with unsqueeze
x_unsqueezed =  x_squeezed.unsqueeze(dim = 0)
print(f"\nNew Tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous target: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New Tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [None]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim (dimension)
print(f"Previous target: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# Add an extra dimension with unsqueeze
x_unsqueezed =  x_squeezed.unsqueeze(dim = 1)
print(f"\nNew Tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous target: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New Tensor: tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
New shape: torch.Size([9, 1])


In [None]:
# torch.permute - rearranges the dimensions of the target tensor in a specific order
x_original = torch.rand(size = (224, 3, 3)) # [height, width, color channels]

# Permute the original tensor to rearrange the axis or (dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # [colour channel, height, width]



Previous shape: torch.Size([224, 3, 3])
New shape: torch.Size([3, 224, 3])


In [None]:
# permuted tensor shares the same memory as the original tensor
x_original [0, 0, 0] = 123
x_original[0, 0, 0], x_permuted[0, 0, 0]

(tensor(123.), tensor(123.))

## Indexing - Selecting data from tensors
Indexing with PyTorch is similar to indexing with numpy

In [None]:
# Create a tensor
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape


(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [None]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
x[0][0]

tensor([1, 2, 3])

In [None]:
x[0, 0]

tensor([1, 2, 3])

In [None]:
x[0][:]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
%%time
for j in x[0][:]:
  print(j[0])

tensor(1)
tensor(4)
tensor(7)
CPU times: user 5.16 ms, sys: 13 µs, total: 5.18 ms
Wall time: 5.12 ms


In [None]:
# Get all values of 0th and 1st dimension but only index 0 values of 2nd dimension
%%time
x[ : , : , 0]

CPU times: user 313 µs, sys: 0 ns, total: 313 µs
Wall time: 353 µs


tensor([[1, 4, 7]])

In [None]:
# Get all values of 0th and 1st dimension but only index 1 values of 2nd dimension
x[ : , : , 1]

tensor([[2, 5, 8]])

In [None]:
# Get all values of 0th dimension but only index 1 values of 1st and 2nd dimension
x[: , 1 , 1]

tensor([5])

## Pytorch to NumPy tensors

NumPy is a popular scientific Python numerical computing library.

And because of this, Pytorch has functionality to interact with it

* Data in NumPy, want in PyTorch tensor -> torch.from_numpy(ndarray)
* PyTorch tensor to NumPy -> torch.tensor.numpy()

In [None]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0) # default datatype is float64
tensor = torch.from_numpy(array).type(torch.float32)
array , tensor

(array([1., 2., 3., 4., 5., 6., 7.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
array.dtype

dtype('float64')

In [None]:
# change the value of an array, waht will this to "tensor"
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
# Tensor to numpy array
tensor = torch.ones (7)
numpy_tensor = tensor.numpy()
tensor , numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
numpy_tensor.dtype

dtype('float32')

In [None]:
# Change the value of tensor, what will this do to numpy_tensor?
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take random out of random)

In short how a neural network learns:

' start with random numbers ' -> tensor operations -> update random numbers to try and make them better representations of data -> again again and again repeat'

To reduce the randomness in neural networks and PyTorch comes the concept of **random seed**

Essentially what the random seed does is "flavour" the randomness

In [None]:
torch.rand(3, 3)



tensor([[0.0375, 0.0407, 0.6274],
        [0.8845, 0.8157, 0.2083],
        [0.8980, 0.5551, 0.6572]])

In [None]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.9106, 0.6300, 0.1561, 0.6380],
        [0.1819, 0.1244, 0.4423, 0.4323],
        [0.8363, 0.7692, 0.9492, 0.1253]])

Tensor B:
tensor([[0.4246, 0.3789, 0.5950, 0.5056],
        [0.0599, 0.9709, 0.5846, 0.2625],
        [0.9933, 0.4762, 0.2774, 0.9155]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [None]:
# Let's make some random but reproducible tensors
import torch

# Set the random seed
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED) # it only works for one block of code
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_C}\n")
print(f"Tensor B:\n{random_tensor_D}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_C == random_tensor_D

Tensor A:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor B:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## Running Tensors and PyTorch objects on the GPUs (and making faster computations)

GPUs = faster computations on numbers, thanks to CUDA + NVIDIA hardware

In [None]:
!nvidia-smi

Fri Jul 12 14:08:20 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Check for GPU access with PyTorch

In [None]:
import torch
torch.cuda.is_available()

True

For PyTorch since it's capable of running compute on the GPU or CPU, it's best practice to use device agnostic code:
[https://pytorch.org/docs/stable/notes/cuda.html#device-agnostic-code](https://pytorch.org/docs/stable/notes/cuda.html#device-agnostic-code
)
E.g: run on GPU if available, else default to CPU

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [6]:
# Count number of devices
torch.cuda.device_count()

10

## Putting tensors and models on the GPU
because GPU results in faster computations

In [None]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# Move tensor to GPU if available
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

## Moving Tensors Back to CPU

In [None]:
# If a tensor is on GPU, can't transfer it to CPU without NumPy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
# To fix the GPU tensor with NumPy issue, we can first set it to the CPU
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

In [None]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

# Exercises

In [None]:
# Create a random tensor with shape (7, 7)
import torch
x = torch.rand(7, 7)
x, x.shape

(tensor([[0.6750, 0.4991, 0.2844, 0.5888, 0.9028, 0.1687, 0.3738],
         [0.6166, 0.3934, 0.1947, 0.2282, 0.2488, 0.9440, 0.1752],
         [0.2170, 0.9384, 0.1198, 0.7383, 0.2227, 0.7069, 0.6521],
         [0.5005, 0.3765, 0.6202, 0.6439, 0.4559, 0.1039, 0.1660],
         [0.3511, 0.7405, 0.1484, 0.0427, 0.8977, 0.6276, 0.2471],
         [0.4474, 0.8526, 0.6462, 0.0248, 0.3123, 0.4646, 0.4838],
         [0.8734, 0.8396, 0.0459, 0.3432, 0.9449, 0.4231, 0.9375]]),
 torch.Size([7, 7]))

In [None]:
# Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).
y = torch.rand(1, 7)
torch.matmul(x, y.T) # y.T to transpose the tensor y

tensor([[2.0436],
        [1.4822],
        [1.9532],
        [1.4896],
        [1.6964],
        [1.4283],
        [2.5324]])

In [None]:
# Set the random seed to 0 and do exercises 2 & 3 over again
RANDOM_SEED = 0
torch.manual_seed(RANDOM_SEED)
x = torch.rand(7, 7)

torch.manual_seed(RANDOM_SEED)
y = torch.rand(1, 7)
torch.matmul(x, y.T)

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])


### Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.

In [None]:
torch.cuda.manual_seed(1234)

### Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).



In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(1234)
x = torch.rand(2, 3, device = device)

torch.manual_seed(1234)
y = torch.rand(2, 3, device = device)

y = y.reshape(3, 2)


x, y

(tensor([[0.1272, 0.8167, 0.5440],
         [0.6601, 0.2721, 0.9737]], device='cuda:0'),
 tensor([[0.1272, 0.8167],
         [0.5440, 0.6601],
         [0.2721, 0.9737]], device='cuda:0'))

### Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

In [None]:
z = torch.matmul(x, y)
z, z.shape, z.device

(tensor([[0.6085, 1.1727],
         [0.4969, 1.6667]], device='cuda:0'),
 torch.Size([2, 2]),
 device(type='cuda', index=0))

### Find the maximum and minimum values of the output of 7.

In [None]:
z.max(), z.min()

(tensor(1.6667, device='cuda:0'), tensor(0.4969, device='cuda:0'))

### Find the maximum and minimum index values of the output of 7.

In [None]:
z.argmax(), z.argmin()

(tensor(3, device='cuda:0'), tensor(2, device='cuda:0'))

### Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [None]:
torch.manual_seed(7)
x = torch.rand(1, 1, 1, 10)


y = x.squeeze()

x , x.shape, y, y.shape


(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 torch.Size([1, 1, 1, 10]),
 tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
         0.8513]),
 torch.Size([10]))