<a href="https://colab.research.google.com/github/lcai000/pytorch_fundamentals/blob/main/00_pytorch_fundamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib as plt
print(torch.__version__)

2.6.0+cu124


## Introduction to Tensors

### Creating Tensors

PyTorch tensors are created using `torch.Tensor()` -= https://pytorch.org/docs/stable/tensors.html

In [3]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
# Get tensor back as Python int
scalar.item()

7

In [6]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [10]:
MATRIX.ndim

2

In [11]:
MATRIX[0]

tensor([7, 8])

In [12]:
MATRIX.shape

torch.Size([2, 2])

In [13]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,5,4]],
                       [[5,2,3],
                        [3,9,9],
                        [2,3,6]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 5, 4]],

        [[5, 2, 3],
         [3, 9, 9],
         [2, 3, 6]]])

In [14]:
TENSOR.ndim

3

In [15]:
TENSOR.shape

torch.Size([2, 3, 3])

In [16]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 5, 4]])

In [17]:
TENSOR[1]

tensor([[5, 2, 3],
        [3, 9, 9],
        [2, 3, 6]])

### Random tensors

Why random tensors?
Random tensors are important because the way many neural networks learn is through starting with tensors full of random numbers and then adjusting those random numbers to better represent the data.

`Start with random numbers -> look at data -> update random numbers`

In [18]:
# Create random tensor of size (3,4)

random_tensor = torch.rand(size=(3,4))
random_tensor

tensor([[0.2356, 0.8441, 0.9399, 0.3866],
        [0.4574, 0.3547, 0.3301, 0.0595],
        [0.0061, 0.7291, 0.9868, 0.8801]])

In [19]:
random_tensor.ndim

2

In [20]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224,224,3)) #height, width, color channels
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [21]:
torch.rand(size=(3,3))

tensor([[0.0402, 0.8029, 0.2383],
        [0.3647, 0.9901, 0.7602],
        [0.4605, 0.5776, 0.7795]])

In [22]:
torch.rand(size=(4,2))

tensor([[0.7605, 0.8921],
        [0.6760, 0.9451],
        [0.3524, 0.9502],
        [0.7762, 0.9510]])

### Zeros and ones

In [23]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [24]:
# Create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [25]:
ones.dtype

torch.float32

### Creating a range of tensors and tensor-like

In [26]:
# Use torch.arange()
one_to_ten = torch.arange(start=0,end=11,step=1)
one_to_ten

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [27]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes
**note:** Tensor datatypes is one of 3 common errors in PyTorch and deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on right device


In [28]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                               dtype=None, # auto sets to float 32, but able to specify(ie. float 16,32,64)
                               device=None, # what device the tensor is on
                               requires_grad=False) # whether or not to track gradients
float_32_tensor

tensor([3., 6., 9.])

In [29]:
float_32_tensor.dtype

torch.float32

In [30]:
# Convert float 32 tensor to float 16 tensor
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [31]:
# add tensors of different datatype
float_32_tensor = torch.tensor([4,2,5],dtype=torch.float32)
int_32_tensor = torch.tensor([3,6,9],dtype=torch.int32)

sum_tensors = float_32_tensor+int_32_tensor
(sum_tensors,sum_tensors.dtype)

(tensor([ 7.,  8., 14.]), torch.float32)

### Getting information from tensors
1. get datatype: `tensor.dtype`
2. get shape: `tensor.shape`
3. get device: `tensor.device`

In [32]:
# Get tensor attributes(type,shape,device)
some_tensor = torch.rand(3,4)
print(str(some_tensor) +'\n'+f"Datatype: {some_tensor.dtype}"+'\n'+f"Shape: {some_tensor.shape}"+'\n'+f"Device: {some_tensor.device}")

tensor([[0.9415, 0.6770, 0.7441, 0.7517],
        [0.1335, 0.2461, 0.1926, 0.0401],
        [0.9514, 0.6184, 0.0113, 0.6157]])
Datatype: torch.float32
Shape: torch.Size([3, 4])
Device: cpu


### Manipulating Tensors(tensor operations)

Tensor operations:
* Addition
* Subtraction
* Multiplication(element wise)
* Division
* Matrix multiplication

In [33]:
# create tensor
tensor = torch.rand([1,3])

In [34]:
# tensor scalar operations
print(f" add by 10: {tensor + 10}")
print(f" subtract by 10: {tensor - 10}")
print(f" multiply by 10: {tensor * 10}")
print(f" divide by 10: {tensor / 10}")

 add by 10: tensor([[10.3395, 10.4638, 10.6470]])
 subtract by 10: tensor([[-9.6605, -9.5362, -9.3530]])
 multiply by 10: tensor([[3.3950, 4.6383, 6.4699]])
 divide by 10: tensor([[0.0339, 0.0464, 0.0647]])


In [35]:
torch.mul(tensor,10) #torch also has inbuilt functions add,sub,mul,div but its recommended to use Python operators

tensor([[3.3950, 4.6383, 6.4699]])

### Matrix multiplication

compute matrix multiplication by using `torch.matmul(tensorA,tensorB)`, `torch.mm(tensorA,tensorB)` or `tensorA @ tensorB`

Two main ways of multiplication on tensors:
1. element-wise multiplication
2. matrix multiplication

Rules of matrix multiplication:
1. the **inner** dimensions must match:
* `(3,2) @ (3,2)` will not work
* `(2,3) @ (3,2)` will work
2. resulting matrix has the shape of the **outer dimentions**:
* `(2,3) @ (3,2) -> (2,2) `
* `(3,2) @ (2,3) -> (3,3) `


In [36]:
tensor = torch.tensor([1,3,5])

In [37]:
# Element wise multipication
print(tensor, "*", tensor)
print(f"Equals: {tensor*tensor}")

tensor([1, 3, 5]) * tensor([1, 3, 5])
Equals: tensor([ 1,  9, 25])


In [38]:
# Matrix multiplication
torch.matmul(tensor,tensor)

tensor(35)

In [39]:
# '@' symbol also works for matrix multiplication
tensor @ tensor

tensor(35)

In [40]:
# Matrix multiplication by hand
1*1+3*3+5*5

35

In [41]:
# matrix multiplication shape error
tensor1 = torch.tensor([3,2])
tensor2 = torch.tensor([3,2,2])
try:
  torch.matmul(tensor1,tensor2)
except Exception as e:
  print(e)

inconsistent tensor size, expected tensor [2] and src [3] to have the same number of elements, but got 2 and 3 elements respectively


#### Time comparision for matrix multiplication: manually vs pytorch

In [42]:
tensor = torch.rand([5,5])

In [43]:
%%time
print(sum(x*x for x in tensor))

tensor([2.3437, 2.0230, 1.4860, 1.4678, 2.7243])
CPU times: user 1.29 ms, sys: 42 µs, total: 1.33 ms
Wall time: 1.37 ms


In [44]:
%%time
print(torch.matmul(tensor,tensor))

tensor([[1.7581, 1.2790, 0.8835, 1.0476, 1.7704],
        [1.3519, 1.5124, 1.4494, 0.7406, 1.9469],
        [1.8489, 1.3016, 1.1762, 1.1955, 1.9184],
        [2.5729, 1.8504, 1.2989, 2.0138, 2.3872],
        [1.9098, 1.5160, 1.2365, 1.5537, 2.1062]])
CPU times: user 1.67 ms, sys: 0 ns, total: 1.67 ms
Wall time: 12.5 ms


### Shape errors in matrix multiplication

In [45]:
tensorA = torch.tensor(([3,5,1],
                       [2,2,5],
                       [9,2,3]))
tensorB = torch.tensor(([3,5,4],
                       [1,0,5],
                       [7,2,4],
                       [2,2,4],
                        ))
print(str(tensorA)+'\n'+str(tensorB))

tensor([[3, 5, 1],
        [2, 2, 5],
        [9, 2, 3]])
tensor([[3, 5, 4],
        [1, 0, 5],
        [7, 2, 4],
        [2, 2, 4]])


In [46]:
print(str(tensorA.shape)+'\n'+str(tensorB.shape))

torch.Size([3, 3])
torch.Size([4, 3])


In [47]:
# inner dimensions do not match, shape error
try:
  torch.matmul(tensorA,tensorB)
except Exception as e:
  print(e)

mat1 and mat2 shapes cannot be multiplied (3x3 and 4x3)


To fix tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**.

A **transpose** switches the axes or dimensions of a given tensor.
`tensorA.T` or `torch.transpose(tensorA,0,1)`

In [48]:
tensorB # original tensorB

tensor([[3, 5, 4],
        [1, 0, 5],
        [7, 2, 4],
        [2, 2, 4]])

In [49]:
tensorB.T # transposed tensorB from shape (3,4) to (4,3)

tensor([[3, 1, 7, 2],
        [5, 0, 2, 2],
        [4, 5, 4, 4]])

In [50]:
torch.matmul(tensorA,tensorB.T) # matrix multiplication works when tensorB is transposed

tensor([[38,  8, 35, 20],
        [36, 27, 38, 28],
        [49, 24, 79, 34]])

In [51]:
print(f"Original shapes: tensorA shape = {tensorA.shape}, tensorB shape = {tensorB.shape}")
print(f"New shapes: tensorA shape = {tensorA.shape}, tensorB shape={tensorB.T.shape}")
print(f"Matrix multiplication of tensors shape {tensorA.shape} and {tensorB.T.shape}")
result = torch.matmul(tensorA,tensorB.T)
print(f'\n {result}')
print(f'\n output shape: {result.shape}')

Original shapes: tensorA shape = torch.Size([3, 3]), tensorB shape = torch.Size([4, 3])
New shapes: tensorA shape = torch.Size([3, 3]), tensorB shape=torch.Size([3, 4])
Matrix multiplication of tensors shape torch.Size([3, 3]) and torch.Size([3, 4])

 tensor([[38,  8, 35, 20],
        [36, 27, 38, 28],
        [49, 24, 79, 34]])

 output shape: torch.Size([3, 4])


## Finding the min, max, mean, sum, etc (tensor aggregation)

In [52]:
# Create a tensor
import torch

x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [53]:
# Find the min
torch.min(x) # or x.min()

tensor(0)

In [54]:
# Find the max
torch.max(x), # or x.max()

(tensor(90),)

In [55]:
# Datatype error when using mean on tensor x
try:
  torch.mean(x)
except Exception as e:
  print(e)

mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long


In [56]:
# Find the mean
torch.mean(x.type(torch.float32)) # convert to float32 to avoid error

tensor(45.)

In [57]:
x.type(torch.float32).mean() # alternative way to compute mean

tensor(45.)

In [58]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

In [59]:
## Finding the positional min and max

In [60]:
x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [61]:
# Find the position in tensor that has the minimum value with argmin() -> returns index position of target tensor where the minimum value occurs
x.argmin()

tensor(0)

In [62]:
x[0]

tensor(0)

In [63]:
# Find the position in tensor that has the maximum value with argmax()
x.argmax()

tensor(9)

In [64]:
x[9]

tensor(90)

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a efined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by stack (hstack)
* Squeeze - remove all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in  certain way

In [80]:
# Create tensor
import torch

x = torch.arange(1.,10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [81]:
# Reshape
x_reshaped = x.reshape(1,9) # arguments in torch.reshape() must have the same product as that of the input tensor
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [67]:
# Change the view
z = x.view(2,5)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10.]]),
 torch.Size([2, 5]))

`torch.view()` creates a new tensor sharing the same memory as the original(changing pytorch's interpretation of the tensor).

`torch.reshape()` creates a new tensor in memory: iff the tensor is non-contiguous(has been modified from initialization).

If tensor is contiguous, `torch.view()` and `torch.reshape()` behave the same.

In [68]:
# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x],dim=0)
x_stacked

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]])

In [69]:
x_stacked = torch.stack([x,x,x,x],dim=1) #change orientation idk what this do
x_stacked

tensor([[ 1.,  1.,  1.,  1.],
        [ 2.,  2.,  2.,  2.],
        [ 3.,  3.,  3.,  3.],
        [ 4.,  4.,  4.,  4.],
        [ 5.,  5.,  5.,  5.],
        [ 6.,  6.,  6.,  6.],
        [ 7.,  7.,  7.,  7.],
        [ 8.,  8.,  8.,  8.],
        [ 9.,  9.,  9.,  9.],
        [10., 10., 10., 10.]])

In [87]:
# torch.squeeze() - removes all single dimensions from a target tensor
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}\n")

# Remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"New tensor: {x_squeezed}")
print(f"Nerw tensor shape: {x_squeezed.shape}")

Previous tensor: tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])
Nerw tensor shape: torch.Size([9])


In [105]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dimension
print(f"Previous target: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0) # dim is the index of the addition
print(f"\n New tensor: {x_unsqueezed}")
print(f"\n New tensor shape: {x_unsqueezed.shape}")

Previous target: tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

 New tensor: tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

 New tensor shape: torch.Size([1, 9])


In [106]:
# torch.permute - rearranges the dimensions of a target tensor
x_original = torch.rand(size=(224,224,3)) # [height,width,color_channels]
print(f"Previous shape: {x_original.shape}\n")

# permute the original tensor to rearrrange the axis (or dim) order
x_permuted = x_original.permute(2,0,1)
print("changes:\n 2nd dim -> 1st dim \n 0th dim -> 1st dim \n 1st dim -> 2nd dim \n")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])

changes:
 2nd dim -> 1st dim 
 0th dim -> 1st dim 
 1st dim -> 2nd dim 

New shape: torch.Size([3, 224, 224])


`torch.permute()` returns a view of the previous tensor - shares same memory as previous tensor

## Indexing (selecting data from tensors)
Indexing with Pytorch is similar to indexing with NumPy

In [107]:
# Create a tensor
import torch
x = torch.arange(1,10).reshape(1,3,3)
x,x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [112]:
# first tensor within the tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [113]:
# first row of the first matrix
x[0][0]

tensor([1, 2, 3])

In [114]:
# first element of the first row of the first matrix
x[0][0][0]

tensor(1)

In [115]:
# center most element
x[0][1][1]

tensor(5)

In [116]:
# ":" to select all of a target dimension
x[:,0]

tensor([[1, 2, 3]])

In [120]:
# get columns
print(f"First column: {x[:,:,0]}")
print(f"Second column: {x[:,:,1]}")
print(f"Second column: {x[:,:,2]}")

First column: tensor([[1, 4, 7]])
Second column: tensor([[2, 5, 8]])
Second column: tensor([[3, 6, 9]])


In [122]:
# get rows
print(f"First row: {x[:,0,:]}")
print(f"Second row: {x[:,1,:]}")
print(f"Third row: {x[:,2,:]}")

First row: tensor([[1, 2, 3]])
Second row: tensor([[4, 5, 6]])
Third row: tensor([[7, 8, 9]])
