# PyTorch Fundementals

python -m venv packages

Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass

.\packages\Scripts\Activate.ps1

pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [30]:
import numpy as np
import torch
torch.__version__

'2.5.1+cu118'

In [14]:
# Scalar
scalar = torch.tensor(7)
print(scalar)
print(scalar.item()) #get the value
print(type(scalar))
print(scalar.ndim) #dimensions


tensor(7)
7
<class 'torch.Tensor'>
0


In [19]:
# Vector
vector = torch.tensor([7, 7])
print(vector)
print(vector.ndim) #dimensions = 1 because it is a vecotr with 1 row
print(vector.shape) #shape is the number of ellements inside = 2

tensor([7, 7])
1
2


In [32]:
# Matrix
MATRIX = torch.tensor([[7, 8], 
                       [9, 10]])

print(MATRIX)
print(MATRIX[1])
print(MATRIX[1,0])
print(MATRIX[:,1])
print(MATRIX.ndim) #dimensions = 1 because it is a vecotr with 1 row
print(MATRIX.shape) #shape is the number of ellements inside = 2


tensor([[ 7,  8],
        [ 9, 10]])
tensor([ 9, 10])
tensor(9)
tensor([ 8, 10])
2
torch.Size([2, 2])


In [52]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
print(TENSOR[0])

import pandas as pd

df = pd.DataFrame(TENSOR[0])
print(df)
print(df[1])
print(df[0][2])

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])
   0  1  2
0  1  2  3
1  3  6  9
2  2  4  5
0    2
1    6
2    4
Name: 1, dtype: int64
2


In [37]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(size=(3, 4)) # 3 row, 4 col
print(random_tensor, random_tensor.dtype)

# Create a random tensor of size (224, 224, 3)
random_image_size_tensor = torch.rand(size=(224, 224, 3))
print(random_image_size_tensor.shape, random_image_size_tensor.ndim)

tensor([[0.2855, 0.3419, 0.4774, 0.2469],
        [0.0560, 0.9875, 0.0659, 0.9844],
        [0.4627, 0.3199, 0.6314, 0.6246]]) torch.float32
torch.Size([224, 224, 3]) 3


In [40]:
X = torch.arange(start=0, end= 10, step=0.2)
print(X)

tensor([0.0000, 0.2000, 0.4000, 0.6000, 0.8000, 1.0000, 1.2000, 1.4000, 1.6000,
        1.8000, 2.0000, 2.2000, 2.4000, 2.6000, 2.8000, 3.0000, 3.2000, 3.4000,
        3.6000, 3.8000, 4.0000, 4.2000, 4.4000, 4.6000, 4.8000, 5.0000, 5.2000,
        5.4000, 5.6000, 5.8000, 6.0000, 6.2000, 6.4000, 6.6000, 6.8000, 7.0000,
        7.2000, 7.4000, 7.6000, 7.8000, 8.0000, 8.2000, 8.4000, 8.6000, 8.8000,
        9.0000, 9.2000, 9.4000, 9.6000, 9.8000])


In [55]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded 

print(float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device)

float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # torch.half would also work

print(float_16_tensor.dtype)

torch.Size([3]) torch.float32 cpu
torch.float16


In [60]:
tensor = torch.rand(3,3)
print(tensor)
print(tensor + 10)
print(tensor * 3)
print(tensor + tensor)

tensor([[0.5614, 0.2599, 0.5349],
        [0.8911, 0.2451, 0.6342],
        [0.9663, 0.9709, 0.5817]])
tensor([[10.5614, 10.2599, 10.5349],
        [10.8911, 10.2451, 10.6342],
        [10.9663, 10.9709, 10.5817]])
tensor([[1.6842, 0.7798, 1.6046],
        [2.6733, 0.7354, 1.9027],
        [2.8990, 2.9126, 1.7451]])
tensor([[1.1228, 0.5199, 1.0697],
        [1.7822, 0.4903, 1.2685],
        [1.9327, 1.9417, 1.1634]])


### Matrix multiplication
One of the most common operations in machine learning and deep learning algorithms (like neural networks) is matrix multiplication.

PyTorch implements matrix multiplication functionality in the torch.matmul() method.

The main two rules for matrix multiplication to remember are:

1. The inner dimensions must match:

- (3, 2) @ (3, 2) won't work
- (2, 3) @ (3, 2) will work
- (3, 2) @ (2, 3) will work

2. The resulting matrix has the shape of the outer dimensions:

- (2, 3) @ (3, 2) -> (2, 2)
- (3, 2) @ (2, 3) -> (3, 3)

For a tensor variable with values [1, 2, 3]:

Operation	Calculation	Code
- Element-wise multiplication	[1x1, 2x2, 3x3] = [1, 4, 9]	tensor * tensor
- Matrix multiplication	[1x1 + 2x2 + 3x3] = [14]	tensor.matmul(tensor)


In [8]:
%%time
tensor = torch.tensor([1, 2, 3])
print(tensor * tensor)
print(tensor.matmul(tensor))

tensor([1, 4, 9])
tensor(14)
CPU times: total: 0 ns
Wall time: 2.19 ms


Example of tensor mismatch

In [9]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

One of the ways to do this is with a transpose (switch the dimensions of a given tensor).
- tensor_A -> tensor_A.T

In [11]:

print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [12]:
print(torch.matmul(tensor_A, tensor_B.T))
print(tensor_A * tensor_B)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])
tensor([[ 7., 20.],
        [24., 44.],
        [45., 72.]])


### Machine Learning example

torch.nn.Linear: Creates a linear layer that computes the output as output = x * W^T + b, where:
W is a weights matrix (randomly initialized),
b is a bias vector (randomly initialized).
Parameters:
in_features=2: The input tensor must have an inner dimension of size 2. For example, the input shape could be (batch_size, 2).
out_features=6: The output tensor will have an inner dimension of size 6. For example, the output shape will be (batch_size, 6).

In [14]:
# Set a manual seed for reproducibility
torch.manual_seed(42) # Ensures that the randomly initialized weights in the linear layer are the same every time the code is run
# Define a linear transformation layer 
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=6) # out_features = describes outer value 
x = tensor_A # input layer
output = linear(x) # output layer = X * W.T + bias
# A bias vector of shape (6,) is added to each row of the result.
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


### Finding the min, max, mean, sum, etc (aggregation)¶

In [None]:
x = torch.arange(0, 100, 10)
print(x)
print(x.min())
print(x.max())
print(x.type(torch.float16).mean()) # have to define the datatype
print(x.sum())

print(x.argmax()) #index of max value
print(x.argmin()) #index pf min value

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
tensor(0)
tensor(90)
tensor(45., dtype=torch.float16)
tensor(450)
tensor(9)
tensor(0)


### Change the type of data

In [27]:
x = torch.tensor([10, 20, 30], dtype = torch.float16)
print(x.dtype)
y = x.type(torch.uint8)
print(y.dtype)

torch.float16
torch.uint8


### Reshaping, stacking, squeezing and unsqueezing
Often times you'll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so, some popular methods are:

- torch.reshape(input, shape)	Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().
- Tensor.view(shape)	Returns a view of the original tensor in a different shape but shares the same data as the original tensor.
- torch.stack(tensors, dim=0)	Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.
- torch.squeeze(input)	Squeezes input to remove all the dimenions with value 1.
- torch.unsqueeze(input, dim)	Returns input with a dimension value of 1 added at dim.
- torch.permute(input, dims)	Returns a view of the original input with its dimensions permuted (rearranged) to dims.

### PyTorch tensors & NumPy

In [32]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [33]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Getting PyTorch to run on the GPU

In [38]:
print(torch.cuda.is_available()) # if yes -> GPU is available

# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# Number fo GPUs availabe for PyTorch
print(torch.cuda.device_count())

# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)
# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
print(tensor_on_gpu)

True
cuda
1
tensor([1, 2, 3]) cpu
tensor([1, 2, 3], device='cuda:0')


In [40]:
# If tensor is on GPU, can't transform it to NumPy (this will error)
tensor_on_gpu.numpy()

# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.