In [None]:
## Pytorch Fundamentals

## Pytorch Fundamentals

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.6.0+cu124


In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


## Introduction to Tensors

### Creating Tensor

In [None]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
scalar.ndim


0

In [6]:
# Get tensor back as python int
scalar.item()

7

In [7]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [8]:
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [11]:
MATRIX.ndim

2

In [12]:
MATRIX.shape

torch.Size([2, 2])

In [13]:
MATRIX[1]

tensor([ 9, 10])

In [14]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [15]:
TENSOR.ndim

3

In [16]:
TENSOR.shape

torch.Size([1, 3, 3])

In [17]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

### Random Tensors

A random tensor is simply a tensor filled with random numbers.

Instead of manually giving numbers (like [1, 2, 3]), you let PyTorch generate random values for you.

In [18]:
#Create a random Tensor of size (3,4)
random_tensor =torch.rand(3,4)
random_tensor

tensor([[0.3796, 0.7441, 0.7129, 0.0169],
        [0.3593, 0.2577, 0.7231, 0.5151],
        [0.4571, 0.2744, 0.5353, 0.3862]])

In [19]:
random_tensor_2 = torch.rand(2,3,4)
random_tensor_2

tensor([[[0.0283, 0.0297, 0.0841, 0.7967],
         [0.6212, 0.2814, 0.5666, 0.2893],
         [0.3649, 0.0179, 0.7108, 0.6658]],

        [[0.9952, 0.4315, 0.7590, 0.5319],
         [0.3926, 0.8060, 0.5273, 0.0759],
         [0.9260, 0.3668, 0.1360, 0.0734]]])

In [20]:
random_tensor.ndim,random_tensor_2.ndim

(2, 3)

In [21]:
# Create a random Tensor with similar shape to an image
random_image_size_tensor = torch.rand(size=(224,224,3)) # height, width, color channels (R,G,B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [22]:
# Create a tensor of all zeroes
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [23]:
# create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [24]:
ones.dtype

torch.float32

### creating a range of tensors and  tensors-like

In [25]:
# use torch.range()
one_to_ten = torch.arange(1,11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [26]:
one_to_ten_another = torch.arange(start=1, end=11, step=1)
one_to_ten_another

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [27]:
another_arange = torch.arange(start=1, end=11, step=2)
another_arange

tensor([1, 3, 5, 7, 9])

In [28]:
steps_list = torch.arange(start=0, end=1000, step=50)
steps_list

tensor([  0,  50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650,
        700, 750, 800, 850, 900, 950])

In [29]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [30]:
ten_ones = torch.ones_like(input=one_to_ten)
ten_ones

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### Tensor datatypes

In [31]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float32, # what datatype is the tensor (e.g. float32 or float16)
                               device=None, # what device is your tensor on
                               requires_grad=False) # whether or not to track gradients with this tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [32]:
float_32_tensor.dtype

torch.float32

In [33]:
float_16_tensor =float_32_tensor.type(torch.float16)
print(float_16_tensor)


tensor([3., 6., 9.], dtype=torch.float16)


In [34]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [35]:
int_32_tensor = torch.tensor([3,6,9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [36]:
float_32_tensor * int_32_tensor

tensor([ 9., 36., 81.])

### Getting information from tensors

Once you've created tensors (or someone else or a PyTorch module has created them for you), you might want to get some information from them.

We've seen these before but three of the most common attributes you'll want to find out about tensors are:

shape - what shape is the tensor? (some operations require specific shape rules)
dtype - what datatype are the elements within the tensor stored in?
device - what device is the tensor stored on? (usually GPU or CPU)
Let's create a random tensor and find out details about it.

In [37]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find its details
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is on: {some_tensor.device}") # will be default to CPU

tensor([[0.0537, 0.6605, 0.5102, 0.3059],
        [0.0710, 0.0813, 0.3880, 0.8427],
        [0.6248, 0.7017, 0.5735, 0.6141]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is on: cpu


### Manipulating tensors (tensor operations)
In deep learning, data (images, text, video, audio, protein structures, etc) gets represented as tensors.

A model learns by investigating those tensors and performing a series of operations (could be 1,000,000s+) on tensors to create a representation of the patterns in the input data.

These operations are often a wonderful dance between:

*Addition
*Substraction
*Multiplication (element-wise)
*Division
*Matrix multiplication
And that's it. Sure there are a few more here and there but these are the basic building blocks of neural networks.

Stacking these building blocks in the right way, you can create the most sophisticated of neural networks (just like lego!).

Basic operations
Let's start with a few of the fundamental operations, addition (+), subtraction (-), mutliplication (*).

They work just as you think they would.

In [38]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [39]:
# Multiply by 10
tensor * 10

tensor([10, 20, 30])

In [40]:
# Subtract 10
tensor - 10

tensor([-9, -8, -7])

In [41]:
# Try out Pytorch in-built functions
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [42]:
torch.add(tensor, 10)

tensor([11, 12, 13])

### Matrix multiplication

two main ways of perfoming multipliocation in neural networks and deep Learning
1. Element-wise multiplication
2. MAtrix multiplication(dot product)

In [43]:
# Element-wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [44]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [45]:
# Matrix multiplication by hand
1*1 + 2*2 + 3*3

14

In [46]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: user 419 µs, sys: 68 µs, total: 487 µs
Wall time: 606 µs


tensor(14)

In [47]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 95 µs, sys: 15 µs, total: 110 µs
Wall time: 220 µs


tensor(14)

In [48]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

The main two rules for matrix multiplication to remember are:

The inner dimensions must match:

(3, 2) @ (3, 2) won't work<br>
(2, 3) @ (3, 2) will work<br>
(3, 2) @ (2, 3) will work<br>
The resulting matrix has the shape of the outer dimensions:

(2, 3) @ (3, 2) -> (2, 2)<br>
(3, 2) @ (2, 3) -> (3, 3)<br>
Note: "@" in Python is the symbol for matrix multiplication.

Resource: You can see all of the rules for matrix multiplication using torch.matmul() in the PyTorch documentation.

In [52]:
# torch.matmul(torch.rand(3,2), torch.rand(3,2))

In [50]:
torch.matmul(torch.rand(2,3), torch.rand(3,2))

tensor([[0.7975, 0.5907],
        [0.7635, 0.3403]])

In [51]:
torch.matmul(torch.rand(3,2), torch.rand(2,3))

tensor([[0.7226, 0.4791, 0.4137],
        [0.4591, 0.2164, 0.2222],
        [0.5836, 0.7994, 0.5247]])

#One of the most common errors in deep learning (shape errors)
Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches.

In [53]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])
#torch.mm(tensor_A,tensor_B) #torch.mm is the same as torch.matmul(it's an alias)
torch.matmul(tensor_A, tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [54]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

We can make matrix multiplication work between tensor_A and tensor_B by making their inner dimensions match. we can manipulate the shape of one tensor using Transpose.

A **Transpose** switches the axes or dimensions of a given tensor.


One of the ways to do this is with a transpose (switch the dimensions of a given tensor).

You can perform transposes in PyTorch using either:

torch.transpose(input, dim0, dim1) - where input is the desired tensor to transpose and dim0 and dim1 are the dimensions to be swapped.
tensor.T - where tensor is the desired tensor to transpose.
Let's try the latter.

In [55]:
tensor_B, tensor_B.shape

(tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]),
 torch.Size([3, 2]))

In [56]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [57]:
torch.matmul(tensor_A, tensor_B.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [58]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

Output shape: torch.Size([3, 3])


### Finding min, max, sum, etc (tensor aggregation)

In [59]:
#create A tensor
x =torch .arange(0,100,10)
x,x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [60]:
# Find the min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [61]:
# Find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [62]:
torch.mean(x)

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [63]:
# Find the average/mean - note: the torch.mean() function requires a tensor of float32 datatype to work
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [64]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

### Finding the positional min and max

In [65]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [66]:
# Find the position in tensor that has the minimum value with argmin() --> retourns index position of target tensor where the minimum value occure
x.argmin()

tensor(0)

In [67]:
# Find the position in tensor that has the maximum value with argmin() --> retourns index position of target tensor where the maximum value occure
x.argmax()

tensor(9)

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* stacking - combine multiple tensors on top of each other (vstack =vertical stack) or side by side (hstack = horizontal stack)
* squeeze - removes '1' dimensions from a tensor
* unsqueeze - add a '1' dimension to a target tensor
* permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [94]:
# Let's create a tensor
import torch
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [95]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [96]:
x_reshaped_1 = x.reshape(9, 1) # as long as (m,n) m * n is equal to the size or shape of the original tensor it will work otherwise it won't
x_reshaped_1, x_reshaped_1.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [97]:
x_reshaped_2 = x.reshape(3,3)
x_reshaped_2 , x_reshaped_2.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [98]:
# Change view (keeps same data as original but changes view)
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

Remember though, changing the view of a tensor with torch.view() really only creates a new view of the same tensor.

So changing the view changes the original tensor too.

In [99]:
# changing z changes x (because a view of a tensor shares the memory as the original input)
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [100]:
# start tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [101]:
# torch.squeeze() - removes all single dimensions from a target tensor
x_reshaped, x_reshaped.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [102]:
x_reshaped.squeeze()

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [103]:
x_reshaped.squeeze().shape

torch.Size([9])

In [104]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


And to do the reverse of torch.squeeze() you can use torch.unsqueeze() to add a dim  (dimension) value of 1 at a specific index.

In [105]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [106]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # [height, weight, colour_channels]

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # [colour_channels ,height ,weight]

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


Note: Because permuting returns a view (shares the same data as the original), the values in the permuted tensor will be the same as the original tensor and if you change the values in the view, it will change the values of the origina

## Quick Summary (Cheat-Sheet)
Operation	What it does	Example shape change

reshape()	Reshape to new shape (same data)	[6] → [2,3]

view()	Same as reshape but shares memory	[6] → [2,3]

stack()	Stack tensors along new axis	[3], [3] → [2,3]

squeeze()	Remove 1-dimensions (1)	[1,3,1] → [3]

unsqueeze()	Add new axis of size 1	[3] → [1,3]

permute()	Reorder dimensions	[2,3,4] → [3,4,2]

## Indexing (selecting data from tensors)

Indexing with pytorch is similar to indexing with Numpy

In [107]:
# Create tensor
import torch
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [108]:
# Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [112]:
# Let's index on the middle bracket dim=1
# x[0][0], # or
x[0,0]

tensor([1, 2, 3])

In [115]:
# Let's index on the most inner bracket(last dimension)
# x[0][0][0]
x[0,0,0]

tensor(1)

In [117]:
x[0,2,2]

tensor(9)

In [118]:
# You can use ":" to select all of a target dimension
x[:,0]

tensor([[1, 2, 3]])

In [119]:
# Get all values of 0th and 1st dimensions but only index 1 of 2nd dimension
x[:,:,1]

tensor([[2, 5, 8]])

In [120]:
# Get all values of the 0 dimension but only the 1 index value of 1st and 2nd dimension
x[:,1,1]

tensor([5])

In [123]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0,0,:]

tensor([1, 2, 3])

In [124]:
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [130]:
# index to return 9
print(x[:,2,2])

# index to return 3,6,9
x[:,:,2]

tensor([9])


tensor([[3, 6, 9]])

In [131]:
x[:,1]

tensor([[4, 5, 6]])

## Pytorch & Numpy
Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.

The two main methods you'll want to use for NumPy to PyTorch (and back again) are:

* torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.
* torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

In [132]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

Note: By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

In [134]:
tensor = torch.from_numpy(array).type(torch.float32)
tensor, tensor.dtype


(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.float32)

In [135]:
# Because we reassigned tensor above, if you change the tensor, the array stays the same.
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [136]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [137]:
# Change the tensor, keep the array the same
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take random out of random)

As you learn more about neural networks and machine learning, you'll start to discover how much randomness plays a part.

Well, pseudorandomness that is. Because after all, as they're designed, a computer is fundamentally deterministic (each step is predictable) so the randomness they create are simulated randomness

How does this relate to neural networks and deep learning then?

We've discussed neural networks start with random numbers to describe patterns in data (these numbers are poor descriptions) and try to improve those random numbers using tensor operations (and a few other things we haven't discussed yet) to better describe patterns in data.

In short:

start with random numbers -> tensor operations -> try to make better (again and again and again)

In [139]:
torch.rand(3,3)

tensor([[0.6918, 0.2447, 0.6616],
        [0.4788, 0.0097, 0.7487],
        [0.1130, 0.7048, 0.1546]])

In [140]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.8003, 0.9500, 0.7479, 0.7179],
        [0.2894, 0.4911, 0.2446, 0.0100],
        [0.8168, 0.7579, 0.1616, 0.5161]])

Tensor B:
tensor([[0.2024, 0.7062, 0.4532, 0.6611],
        [0.1064, 0.2094, 0.0762, 0.8961],
        [0.1038, 0.3898, 0.1075, 0.8628]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

To reduce the randomness in neural networks and pytorch comes the concept of a **random seed**
Essentially what the random seed does is 'flavour' the randomness

In [141]:
import torch
import random

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

Resource: What we've just covered only scratches the surface of reproducibility in PyTorch. For more, on reproducibility in general and random seeds, I'd checkout:


* The PyTorch reproducibility documentation (a good exercise would be to read through this for 10-minutes and even if you don't understand it now, being aware of it is important).

* The Wikipedia random seed page (this'll give a good overview of random seeds and pseudorandomness in general).

## Running tensors on GPUs (and making faster computations)

Deep learning algorithms require a lot of numerical operations.

And by default these operations are often done on a CPU (computer processing unit).

However, there's another common piece of hardware called a GPU (graphics processing unit), which is often much faster at performing the specific types of operations neural networks need (matrix multiplications) than CPUs.

Note: When I reference "GPU" throughout this course, I'm referencing a Nvidia GPU with CUDA enabled (CUDA is a computing platform and API that helps allow GPUs be used for general purpose computing & not just graphics) unless otherwise specified.


### 1. Getting a GPU

You may already know what's going on when I say GPU. But if not, there are a few ways to get access to one.

| **Method** | **Difficulty to setup** | **Pros** | **Cons** | **How to setup** |
| ----- | ----- | ----- | ----- | ----- |
| Google Colab | Easy | Free to use, almost zero setup required, can share work with others as easy as a link | Doesn't save your data outputs, limited compute, subject to timeouts | [Follow the Google Colab Guide](https://colab.research.google.com/notebooks/gpu.ipynb) |
| Use your own | Medium | Run everything locally on your own machine | GPUs aren't free, require upfront cost | Follow the [PyTorch installation guidelines](https://pytorch.org/get-started/locally/) |
| Cloud computing (AWS, GCP, Azure) | Medium-Hard | Small upfront cost, access to almost infinite compute | Can get expensive if running continually, takes some time to setup right | Follow the [PyTorch installation guidelines](https://pytorch.org/get-started/cloud-partners/) |

There are more options for using GPUs but the above three will suffice for now.

Personally, I use a combination of Google Colab and my own personal computer for small scale experiments (and creating this course) and go to cloud resources when I need more compute power.

> **Resource:** If you're looking to purchase a GPU of your own but not sure what to get, [Tim Dettmers has an excellent guide](https://timdettmers.com/2020/09/07/which-gpu-for-deep-learning/).

To check if you've got access to a Nvidia GPU, you can run `!nvidia-smi` where the `!` (also called bang) means "run this on the command line".



In [1]:
!nvidia-smi

Mon Jun 23 21:34:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   51C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                



### 2. Getting PyTorch to run on the GPU

Once you've got a GPU ready to access, the next step is getting PyTorch to use for storing data (tensors) and computing on data (performing operations on tensors).

To do so, you can use the [`torch.cuda`](https://pytorch.org/docs/stable/cuda.html) package.

Rather than talk about it, let's try it out.

You can test if PyTorch has access to a GPU using [`torch.cuda.is_available()`](https://pytorch.org/docs/stable/generated/torch.cuda.is_available.html#torch.cuda.is_available).


In [1]:
# Check for GPU access with Pytorch
import torch
torch.cuda.is_available()

True

Note: In PyTorch, it's best practice to write device agnostic code. This means code that'll run on CPU (always available) or GPU (if available).

In [5]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Count number of devices
torch.cuda.device_count()

1

## 3. Putting tensors and models on the CPU

The reason we want our tensors/models on the GPU is because using a GPU results in faster computations.

In [3]:
# Create a tensor (default on the cpu)
tensor = torch.tensor([1, 2, 3])

# Tensor not on gpu
print(tensor, tensor.device)



tensor([1, 2, 3]) cpu


In [6]:
# Move tensor to gpu (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### 4. Moving Tensors back to the CPU

In [7]:
# If tensor is on GPU, can't transform it to Numpy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [8]:
# To fix the gpu tensor with NUmpy issue, we can first set it to the CPU
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

In [9]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')