# 00. Pytorch fundementals

In [1]:
import torch 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

print(torch.__version__)

2.6.0+cu126


## Introduction to tensors

### Creating tensors

Pytorch tensors are created by `torch.tensor()`

In [3]:
# Scalar
scalar = torch.tensor(7)
print(scalar)

# Number of dimensions of a tensor
print(scalar.ndim)

# Get tensor back as python int 
print(scalar.item())

tensor(7)
0
7


In [4]:
# Vector
vector = torch.tensor([4,432,3])
print(vector)
# Dimensions
print(vector.ndim)
# Shape
print(vector.shape)

tensor([  4, 432,   3])
1
torch.Size([3])


In [5]:
# Matrix, often uppercase nomenclature
MATRIX = torch.tensor([[7,8], [23,3]])
print(MATRIX.ndim)
print(MATRIX.shape)

2
torch.Size([2, 2])


In [6]:
# Tensor, ofter uppercase nomenclature
TENSOR = torch.tensor([[[23,3,23], [2,3,4]],
                        [[23,32,3], [32,3,23]]])
print(TENSOR.shape)
print(TENSOR.ndim)
print(TENSOR[0])

torch.Size([2, 2, 3])
3
tensor([[23,  3, 23],
        [ 2,  3,  4]])


### Random tensors

Random tensors are important because the way many neural networks work is that they start with random numbers, and then adjust those random numbers to better represent the data

`Start with random numbers -> look at data -> update random numbers -> look at data -> update numbers...`

In [7]:
#Creating a random tensor of size (2,3,4)
random_tensor = torch.rand(2,3,4)
print(random_tensor)
print(random_tensor.ndim)
print(random_tensor.shape)

tensor([[[0.2052, 0.5802, 0.2260, 0.9425],
         [0.0592, 0.2300, 0.0947, 0.8146],
         [0.1838, 0.2454, 0.6977, 0.9551]],

        [[0.9094, 0.0112, 0.2670, 0.0648],
         [0.6393, 0.6988, 0.2443, 0.7508],
         [0.8663, 0.1499, 0.5250, 0.3760]]])
3
torch.Size([2, 3, 4])


In [8]:
# Create a random tensor with a similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(3,224,224)) #height, width, color channels (R, G, B)
print(random_image_size_tensor.shape, random_image_size_tensor.ndim)

torch.Size([3, 224, 224]) 3


### Zeros and ones

In [9]:
# Create a tensor of all zeros 
zeros = torch.zeros(size = (3,4))
print(zeros)
print(zeros * random_tensor)

# Create a tensor of all ones
ones = torch.ones(size = (3,4), dtype=float)
print(ones)
print(ones.dtype)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=torch.float64)
torch.float64


### Create a range of tensors and tensor-like

In [10]:
# Use torch.arange
one_to_eleven =  torch.arange(start=39, end = 3234, step =44)
print(one_to_eleven)

# Creating tensors-like (the same shape as the inputted tensor)
ten_zeros = torch.zeros_like(input = one_to_eleven)
print(ten_zeros)


tensor([  39,   83,  127,  171,  215,  259,  303,  347,  391,  435,  479,  523,
         567,  611,  655,  699,  743,  787,  831,  875,  919,  963, 1007, 1051,
        1095, 1139, 1183, 1227, 1271, 1315, 1359, 1403, 1447, 1491, 1535, 1579,
        1623, 1667, 1711, 1755, 1799, 1843, 1887, 1931, 1975, 2019, 2063, 2107,
        2151, 2195, 2239, 2283, 2327, 2371, 2415, 2459, 2503, 2547, 2591, 2635,
        2679, 2723, 2767, 2811, 2855, 2899, 2943, 2987, 3031, 3075, 3119, 3163,
        3207])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0])


### Tensor datatypes

**Note:** it is one of the 3 big errors you'll run into with PyTorch & deep learning:
1. Tensors not right datatype
2. Tensors not the right shape
3. Tensors not on rigth device

In [11]:
# Float_32 tensor
# Look at datatypes torch.dtypes
float_32_tensor = torch.tensor([3.0,6.0,9.0], # 3 most important params
                               dtype=torch.float32, # data type of your sensor
                             device=None,# which device your tensor is o
                             requires_grad=False) # Wheather or not to track gradients
print(float_32_tensor.dtype)

# Converting 
float_16_tensor = float_32_tensor.type(torch.float16)
print(float_16_tensor.dtype)

torch.float32
torch.float16


In [12]:
int_32_tensor = torch.tensor([3,2,32], dtype=torch.int32)
print(int_32_tensor)
print(float_32_tensor * int_32_tensor)

long_tensor = torch.tensor([3,23,3], dtype=torch.long)
print(float_32_tensor * long_tensor)

tensor([ 3,  2, 32], dtype=torch.int32)
tensor([  9.,  12., 288.])
tensor([  9., 138.,  27.])


### Getting information from tensors (tensor attributes)

device - .device

data type - .dtype

shape - .shape

In [13]:
some_tensor = torch.rand(size=(3,2), device="cuda")
print(some_tensor)
print(f"Device: {some_tensor.device}\nData type: {some_tensor.dtype}\nShape: {some_tensor.shape}")


tensor([[0.2741, 0.2541],
        [0.2082, 0.6451],
        [0.4031, 0.8766]], device='cuda:0')
Device: cuda:0
Data type: torch.float32
Shape: torch.Size([3, 2])


### Manipulating tensors (tensor operations)

Tensor operations:
* Addition
* Subtraction
* Multiplication (element-wise)
* Multiplication (Matrix)
* Division

In [14]:
# Create a tensor, and add 10
tensor = torch.tensor([1,2,3])
tensor += 10
print(tensor)
print(tensor * 10) 

# Subtract
print(tensor -10)

# Try out PyTorch in-built functions
torch.mul(tensor, 10)

tensor([11, 12, 13])
tensor([110, 120, 130])
tensor([1, 2, 3])


tensor([110, 120, 130])

## Matrix multiplication

3 main ways of performing multiplication in deep learning:
* Element-wise multiplication
* Matrix multiplication

In [15]:
# Element wise 
print(tensor, "*", tensor , "=", tensor*tensor)

# Matrix multiplication
print(tensor @ tensor)
torch.matmul(tensor, tensor)

tensor([11, 12, 13]) * tensor([11, 12, 13]) = tensor([121, 144, 169])
tensor(434)


tensor(434)

In [16]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i]

CPU times: total: 0 ns
Wall time: 0 ns


In [17]:
%%time
torch.matmul(tensor,tensor)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(434)

##### Matrix multiplication rules
1. The **inner dimensions** must match
2. The resulting matrix has the shape of the outer dimensions

In [18]:
# Example
print((torch.rand(3,2) @ torch.rand(2,3)).shape) # Works (inner dimensions are the same), dim is 3x3
torch.rand(3,2) @torch.rand(3,3) # Doesn't work

torch.Size([3, 3])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x3)

In [19]:
# Shapes for matrix multiplications
# Transposition
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])

tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])
# tensor.T for to get the transpose of the tensor
print(tensor_A.shape)
print(tensor_A.T.shape)
print(tensor_A.T)
print(tensor_A.T @ tensor_B)

torch.Size([3, 2])
torch.Size([2, 3])
tensor([[1, 3, 5],
        [2, 4, 6]])
tensor([[ 76, 103],
        [100, 136]])


### Agregation functions: min, max, mean, sum, etc.


In [20]:
tensor = torch.arange(0,100,10)

# Find the min
torch.min(tensor), tensor.min()

# Find teh max
torch.max(tensor), tensor.max()

# Find the mean, we have to convert to float, since the torch.mean() requires the float data type
torch.mean(tensor.type(dtype=torch.float32)), tensor.type(dtype=torch.float32).mean()

# Find the sum
torch.sum(tensor), tensor.sum()

(tensor(450), tensor(450))

In [21]:
# Postional min and max (find the index of the minimum value in the tensor)
tensor = torch.tensor([[3,23,52,2,0,32342], [3,23,32,23,32,-23]])
tensor.argmin()
tensor.argmax()

tensor(5)

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes the input tensor to the defined shape
* View - return a view of and input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack - vertical), (hstack - horizontal)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - REturn a view of the input with dimensions permuted (swapped) in a certain way

In [22]:
x = torch.arange(1,10)
print(x, x.shape)

# Reshaping, add an extra dimension
x_reshaped = x.reshape(1,3,3) # The dimensions have to be appropriate for the amount of elements
print(x_reshaped)

# Change the view
z = x.view(1,9) # Z has the same reference to x, if we change z we also change x
print(z, z.shape)
z[:,0] = 500
print(z,x)

# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x], dim = 1) # In which dimension do we stack together
print(x_stacked)

# Squeeze and Unsqueeze
x_squeezed = torch.squeeze(x)
print(x_squeezed)
x_unsqueezed = torch.unsqueeze(x, 1)
print(x_unsqueezed)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]) torch.Size([9])
tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]) torch.Size([1, 9])
tensor([[500,   2,   3,   4,   5,   6,   7,   8,   9]]) tensor([500,   2,   3,   4,   5,   6,   7,   8,   9])
tensor([[500, 500, 500, 500],
        [  2,   2,   2,   2],
        [  3,   3,   3,   3],
        [  4,   4,   4,   4],
        [  5,   5,   5,   5],
        [  6,   6,   6,   6],
        [  7,   7,   7,   7],
        [  8,   8,   8,   8],
        [  9,   9,   9,   9]])
tensor([500,   2,   3,   4,   5,   6,   7,   8,   9])
tensor([[500],
        [  2],
        [  3],
        [  4],
        [  5],
        [  6],
        [  7],
        [  8],
        [  9]])


In [36]:
# torch.squeeze() - removes all single dimensions from a target tensor
print(f"previous tensor: {x_reshaped}")
print(f"previous shape: {x_reshaped.shape}")
print(f"new tensor: {x_squeezed.squeeze()}")
print(f"new tensor's shape: {x_squeezed.squeeze().shape}")

previous tensor: tensor([[[500,   2,   3],
         [  4,   5,   6],
         [  7,   8,   9]]])
previous shape: torch.Size([1, 3, 3])
new tensor: tensor([500,   2,   3,   4,   5,   6,   7,   8,   9])
new tensor's shape: torch.Size([9])


In [39]:
#torch.unsqueeze() - adds a single dimensions to a target tensor at a specific dim
x_squeezed = x_reshaped.squeeze()
print(f"previous target: {x_squeezed}")
print(f"previous shape: {x_squeezed.shape}")
print(f"new tensor: {x_reshaped.unsqueeze(dim=0)}")
print(f"new tensor's shape: {x_squeezed.unsqueeze(dim=1).shape}")

previous target: tensor([[500,   2,   3],
        [  4,   5,   6],
        [  7,   8,   9]])
previous shape: torch.Size([3, 3])
new tensor: tensor([[[[500,   2,   3],
          [  4,   5,   6],
          [  7,   8,   9]]]])
new tensor's shape: torch.Size([3, 1, 3])


In [47]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order (returns a view!!!!)
x_original = torch.rand(size=(224,224,3)) # heigth, width, color_channels

# Permute the original tensor to rearragne the axis (or dim) order 
x_permuted = x_original.permute(2,0,1) # shifts axes 0 -> 1, 1 -> 2, 2 -> 0
print(f"original shape: {x_original.shape}")
print(f"permuted shape: {x_permuted.shape}")

#it changes both values since it is a view!!!
x_permuted[0,0,0] = 3000
print(x_original[0,0,0])

original shape: torch.Size([224, 224, 3])
permuted shape: torch.Size([3, 224, 224])
tensor(3000.)


## Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy

In [66]:
x = torch.arange(1,10).reshape(1,3,3)
print(x,x.shape)

#Lets index on our tensor
print(x[0])
print(x[0,0])
print(x[0,1,1])
print(x[0,2,2])

# Slicing
print(x[:,1,:2])

x[:,1,1]

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]]) torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(5)
tensor(9)
tensor([[4, 5]])


tensor([5])

## PyTorch tensors & NumPy

NumPy is a popular scientific Python numerical computing library

And because of this, PyTorch has functionality to interact with it

* Data in NumPy, want in PyTorch tensors -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> torch.Tensor.numpy()


In [None]:
# NumPy array to tensor, careful since the default numpy type is float 64
import numpy as np

arr = np.arange(1.0,8.0)
tensor = torch.from_numpy(arr)
print(arr, tensor)
print(arr.dtype, tensor.dtype)

#change the datatype
tensor = tensor.type(dtype=torch.float32)
print(tensor.dtype)


# Change the value of the array, what will this do to the tensor -> we get a new tensor
arr = arr + 1
print(arr, tensor)

[1. 2. 3. 4. 5. 6. 7.] tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)
float64 torch.float64
torch.float32
[2. 3. 4. 5. 6. 7. 8.] tensor([1., 2., 3., 4., 5., 6., 7.])


In [82]:
# Tensor to numpy array
tensor = torch.ones(6)
arr  = tensor.numpy(force=True)
print(tensor, arr)
print(tensor.dtype, arr.dtype) # The dtype stays as the one in pytorch

# Change the tensor, what happens to array
tensor += 1
print(arr, tensor)

tensor([1., 1., 1., 1., 1., 1.]) [1. 1. 1. 1. 1. 1.]
torch.float32 float32
[2. 2. 2. 2. 2. 2.] tensor([2., 2., 2., 2., 2., 2.])


## Reproducibility (trying to take the random out of random)

In short how a neural network learns:

`start with random numbers -> tensor operations -> update numbers ....`

To reduce the randomness in neural networks and PyTorch comes the concept of **random seed** 


Extra resources: pytorch randomness

In [None]:
# 2 Random tensors
rand_a = torch.rand(size=(3,5))
rand_b = torch.rand(size=(3,5))

print(rand_a, rand_b)
print(rand_a == rand_b) # element wise comparison

tensor([[0.6945, 0.8916, 0.9035, 0.0687, 0.9752],
        [0.6062, 0.3187, 0.0471, 0.3234, 0.0310],
        [0.9752, 0.0089, 0.7499, 0.5467, 0.7973]]) tensor([[0.3786, 0.3189, 0.8153, 0.1934, 0.3732],
        [0.8284, 0.6467, 0.9792, 0.2876, 0.4436],
        [0.3637, 0.4556, 0.5367, 0.0697, 0.8862]])
tensor([[False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False]])


In [None]:
# Let's make some random but reproducible tensors, set the random seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_c = torch.rand(size=(3,5))

# if you do it before every one, they are the same tensors
torch.manual_seed(RANDOM_SEED)
random_d = torch.rand(size=(3,5))
print(random_c == random_d)


tensor([[True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904],
        [0.6009, 0.2566, 0.7936, 0.9408, 0.1332],
        [0.9346, 0.5936, 0.8694, 0.5677, 0.7411]]) tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904],
        [0.6009, 0.2566, 0.7936, 0.9408, 0.1332],
        [0.9346, 0.5936, 0.8694, 0.5677, 0.7411]])


## Running tensors and PyTorch objects on GPUs (and making faster computations)

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working begind the scenes to make everything good

In [126]:
!nvidia-smi

Sat Feb 15 12:00:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 561.03                 Driver Version: 561.03         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   39C    P8              3W /   75W |    1993MiB /   8188MiB |     32%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### 2. Check for GPU access with PyTorch

In [129]:
# Check for GPU access with PyTorch 
print(torch.cuda.is_available())

True


In [135]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu" # often used nam

# Count the devices
print(torch.cuda.device_count())

1


## 3. Putting tensors (and models) on the GPU

In [None]:
# Create a sensor, default is on the cpu
tensor = torch.tensor([1,2,3], device="cpu") # tensor on cpu (default is cpu anyway)
print(tensor, tensor.device)

# Move tensor to GPU (if available) 
tensor_on_gpu = tensor.to(device)
print(tensor_on_gpu) # also tells which gpu it is on


tensor([1, 2, 3]) cpu
tensor([1, 2, 3], device='cuda:0')


### 4. Moving tensors back to the CPU

In [142]:
# If tensor is on GPU, you can't transfrom it to NumPy
#tensor_on_gpu.numpy() -> doesn't work

# So we must first set it to the CPU
tensor_on_cpu = tensor_on_gpu.to("cpu")
print(tensor_on_cpu, tensor_on_cpu.device)
tensor_on_cpu.numpy()


tensor([1, 2, 3]) cpu


array([1, 2, 3])

### Exercises & Extra curriculum
**learnpytorch.io**

In [161]:

torch.manual_seed(0)
torch.cuda.manual_seed(0)

tensor = torch.rand(size=(7,7), device=device)
print(tensor)

tensor_2 = torch.ones(size=(1,7), device=device)
t = tensor @ tensor_2.T
print(t)

print(min(t))
print(max(t))

t = torch.rand(size=(1,1,1,10))
print(t.squeeze())



tensor([[0.3990, 0.5167, 0.0249, 0.9401, 0.9459, 0.7967, 0.4150],
        [0.8203, 0.2290, 0.9096, 0.1183, 0.0752, 0.4092, 0.9601],
        [0.2093, 0.1940, 0.8909, 0.4387, 0.3570, 0.5454, 0.8299],
        [0.2099, 0.7684, 0.4290, 0.2117, 0.6606, 0.1654, 0.4250],
        [0.9927, 0.6964, 0.2472, 0.7028, 0.7494, 0.9303, 0.0494],
        [0.0750, 0.7223, 0.9478, 0.3647, 0.2215, 0.7784, 0.6391],
        [0.2077, 0.7045, 0.9609, 0.0594, 0.3358, 0.0616, 0.7030]],
       device='cuda:0')
tensor([[4.0383],
        [3.5217],
        [3.4651],
        [2.8699],
        [4.3682],
        [3.7487],
        [3.0329]], device='cuda:0')
tensor([2.8699], device='cuda:0')
tensor([4.3682], device='cuda:0')
tensor([0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901, 0.8964, 0.4556,
        0.6323])
