In [3]:
import torch
torch.__version__

'2.7.1+cu128'

In [24]:
# scalar
scalar = torch.tensor(7)
scalar.ndim, scalar.shape, scalar.item()

(0, torch.Size([]), 7)

In [28]:
# Vector 
vector = torch.tensor([7, 7])
vector
vector.ndim, vector.shape

(1, torch.Size([2]))

In [31]:
# Matrix 
MATRIX = torch.tensor([[7, 8], [9, 10]])
MATRIX
MATRIX.ndim, MATRIX.shape

(2, torch.Size([2, 2]))

In [34]:
# Random tensors
random_tensor = torch.rand(size=(3, 4))
random_tensor, random_tensor.dtype

(tensor([[0.1098, 0.2215, 0.1845, 0.2647],
         [0.3617, 0.9194, 0.1798, 0.7608],
         [0.9724, 0.6577, 0.8902, 0.6948]]),
 torch.float32)

In [37]:
# Zeros 
zeros = torch.zeros(size=(3,4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [38]:
# Ones 
ones = torch.ones(size=(3,4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [42]:
# Creating a range and tensors like 
# torch.arange(start, end, step)
even_nums = torch.arange(start=0, end=10 , step=2)
even_nums

tensor([0, 2, 4, 6, 8])

In [43]:
"""
Sometimes you might want one tensor of a certain type with the same shape as another tensor.

For example, a tensor of all zeros with the same shape as a previous tensor.

To do so you can use torch.zeros_like(input) or torch.ones_like(input) which return a tensor filled with zeros or ones in the same shape as the input respectively.
"""

five_zeros = torch.zeros_like(input=even_nums)
five_zeros

tensor([0, 0, 0, 0, 0])

In [44]:
# Tensor Datatypes

float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                              dtype=None, # defaults to None, which is torch.float32 or whatever datatyped is passed
                              device=None, # default to None, which usess the fault tensor type
                              requires_grad=False) # if True, operations performed on the tensor are recoreded
float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

<h1>Tensor Operations</h1>

In [46]:
tensor = torch.tensor([1, 2, 3])
tensor + 10 

tensor([11, 12, 13])

In [48]:
tensor * 10 # more common
# or 
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [49]:
tensor - 10

tensor([-9, -8, -7])

In [50]:
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [57]:
# Element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [58]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [52]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

* You can do matrix multiplication by hand but it's not recommended.
* The in-built torch.matmul() method is faster.

In [56]:
%%time
# Matrix multiplication by hand 
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0 
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

CPU times: total: 0 ns
Wall time: 287 μs


tensor(14)

In [59]:
%%time 
torch.matmul(tensor, tensor)

CPU times: total: 0 ns
Wall time: 58.4 μs


tensor(14)

# One of the most common errors in deep learning (shape errors)
* Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches.

In [63]:
tensor_A = torch.tensor([[1, 2],
                        [3, 4],
                        [5, 6]], dtype=torch.float32)
tensor_B = torch.tensor([[7, 10],
                       [8, 11],
                       [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) #(causing error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [70]:
# We can fix that with Transpose 
print(tensor_A.shape, tensor_B.shape)
print(tensor_A.shape, tensor_B.T.shape)
tensor_A @ tensor_B.T


torch.Size([3, 2]) torch.Size([3, 2])
torch.Size([3, 2]) torch.Size([2, 3])


tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [72]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [75]:
torch.manual_seed(42)
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dim of input
                        out_features=6) # out_features = describes outer value
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


## Finding the min, max, mean, sum, etc (aggreation)

In [76]:
# Create a tensor 
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [79]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") #this will error
print(f"Mean: {x.type(torch.float32).mean()}") # wont work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [80]:
# You can also do the same as above with torch methods.
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

## Positional min/max 

In [82]:
# Create a tensor 
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


## Change tensor datatype

In [86]:
# Create a tensor and check its datatype
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [84]:
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [87]:
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

In [95]:
cuda0 = torch.device('cuda:0')
tensor_ones = torch.ones([2, 4], dtype=torch.float64, device=cuda0)
tensor_ones.device

device(type='cuda', index=0)

In [101]:
x = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True)
out = x.pow(2).sum()
out.backward()
x.grad

tensor([[ 2., -2.],
        [ 2.,  2.]])

## Reshaping, stacking, squeezing and unsqueezing

In [102]:
# Create a tensor 
import torch 
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [107]:
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [108]:
# Change view (keeps same data as original but changes view)
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [109]:
z[:, 0] = 5 
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [114]:
# stack on top of each other 
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

In [115]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dim from x_reshaped 
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [116]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [117]:
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))
# (height, width, channels) -> (channels, height, width)
x_permuted = x_original.permute(2, 0, 1)
print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")
"""
Note: Because permuting returns a view (shares the same data as the original), 
the values in the permuted tensor will be the same as the original tensor 
and if you change the values in the view, it will change the values of the original.
"""

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


## Indexing (selecting data from tensors)

In [118]:
import torch 
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [119]:
# Let's index bracket by bracket
print(f"First square bracket:\n {x[0]}")
print(f"Second square bracket: {x[0][0]}")
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
 tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [120]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]

tensor([[1, 2, 3]])

In [121]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [122]:
# Get all values of the 0th dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [123]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :] # same as x[0][0]

tensor([1, 2, 3])

## Pytorch tensors & Numpy 
* torch.from_numpy(ndarray) - NumPy array ->  PyTorch tensor
* torch.Tensor.numpy() -PyTorch tensor -> NumPy array

In [124]:
import torch 
import numpy as np 
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

* Note: By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

In [125]:
array = array + 1 
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [126]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy()  # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

# Reproducibility (trying to take the random out of random)
So you can perform repeatable experiments.

For example, you create an algorithm capable of achieving X performance.

And then your friend tries it out to verify you're not crazy.

How could they do such a thing?

That's where reproducibility comes in.
In other words, can you get the same (or very similar) results on your computer running the same code as I get on mine?

In [127]:
import torch 

# Create 2 random tensors 
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)
print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.8016, 0.3649, 0.6286, 0.9663],
        [0.7687, 0.4566, 0.5745, 0.9200],
        [0.3230, 0.8613, 0.0919, 0.3102]])

Tensor B:
tensor([[0.9536, 0.6002, 0.0351, 0.6826],
        [0.3743, 0.5220, 0.1336, 0.9666],
        [0.9754, 0.8474, 0.8988, 0.1105]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [129]:
import torch 
import random

# Set the random seed 
RANDOM_SEED = 42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C 
torch.random.manual_seed(seed=RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## Run Pytorch on the GPU

In [1]:
!nvidia-smi

Fri Jun 27 06:42:14 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.52                 Driver Version: 576.52         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5070      WDDM  |   00000000:01:00.0 Off |                  N/A |
|  0%   26C    P8              2W /  250W |       0MiB /  12227MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
import torch 
torch.cuda.is_available()

True

In [6]:
# Set device type 
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [7]:
# Count number of devices
torch.cuda.device_count()

1

In [13]:
# create tensor default on cpu
tensor = torch.tensor([1, 2, 3])

print(tensor, tensor.device)

tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu


tensor([1, 2, 3], device='cuda:0')

In [15]:
# if tensor on GPU, can not transform it to Numpy (this will error)
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [16]:
# instead copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

# Exercises

In [22]:
# Create a random tensor with shape (7, 7)
import torch 
random_tensor = torch.rand(7, 7)
random_tensor, random_tensor.shape, random_tensor.ndim

(tensor([[0.2726, 0.3558, 0.0832, 0.7613, 0.5524, 0.6808, 0.3721],
         [0.3463, 0.3327, 0.2539, 0.0585, 0.0593, 0.4725, 0.1712],
         [0.1855, 0.3265, 0.1902, 0.9728, 0.7461, 0.1036, 0.5347],
         [0.8250, 0.0777, 0.8793, 0.7163, 0.5118, 0.7041, 0.9171],
         [0.8515, 0.8048, 0.1697, 0.2446, 0.8746, 0.0167, 0.0268],
         [0.4946, 0.4601, 0.8788, 0.9911, 0.3032, 0.3395, 0.5107],
         [0.0084, 0.5870, 0.6421, 0.6948, 0.1409, 0.3868, 0.0231]]),
 torch.Size([7, 7]),
 2)

In [24]:
#  Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor)
random_tensor2 = torch.rand(1, 7)
rs = torch.mm(random_tensor, random_tensor2.T)
rs, rs.shape

(tensor([[1.7230],
         [0.7636],
         [2.0546],
         [2.3482],
         [2.3512],
         [2.1224],
         [1.1645]]),
 torch.Size([7, 1]))

In [26]:
# . Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one)
# If there is, set the GPU random seed to 1234.
torch.manual_seed(1234)

<torch._C.Generator at 0x248b5250870>

In [30]:
"""
Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). 
Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed). The output should be something like:
"""
torch.manual_seed(1234)
device = "cuda" if torch.cuda.is_available() else "cpu"

tensor_A = torch.rand(2, 3).to(device)
tensor_B = torch.rand(2, 3).to(device)
tensor_A, tensor_B

(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))

In [39]:
#  Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors)
x = torch.mm(tensor_A, tensor_B.T)
x

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='cuda:0')

In [36]:
# Find the maximum and minimum values of the output of 7
x.min().item(), x.max().item()

(0.3647301495075226, 0.5617256760597229)

In [40]:
# Find the maximum and minimum index values of the output of 7
x.argmax().item(), x.argmin().item()

(3, 0)

In [46]:
"""
Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10).
Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape
"""
tensor_C = torch.rand(1, 1, 1, 10)
tensor_D = tensor_C.squeeze()
tensor_D, tensor_D.shape

(tensor([0.6604, 0.1303, 0.3498, 0.3824, 0.8043, 0.3186, 0.2908, 0.4196, 0.3728,
         0.3769]),
 torch.Size([10]))