<a href="https://colab.research.google.com/github/linhoangce/pytorch_crash_course/blob/main/00_pytorch_funfamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.6.0+cu124


## Introduction to Tensors

### Creating tensors



In [2]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [3]:
scalar.ndim

0

In [4]:
# Get tensor back as Python int
scalar.item()

7

In [5]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [6]:
vector.ndim

1

In [7]:
vector.shape

torch.Size([2])

In [8]:
# MATRIX
MATRIX = torch.tensor([[7, 8],
                      [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [9]:
MATRIX.ndim

2

In [10]:
MATRIX.shape

torch.Size([2, 2])

In [11]:
MATRIX[0]

tensor([7, 8])

In [12]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [1, 9, 10]]])
TENSOR

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 1,  9, 10]]])

In [13]:
TENSOR.ndim

3

In [14]:
TENSOR.shape

torch.Size([1, 3, 3])

In [15]:
TENSOR[0, 1]

tensor([4, 5, 6])

In [16]:
TENSOR1 = torch.tensor([[[1, 2, 4],
                         [2, 5, 6]],
                        [[1, 1, 5],
                         [5, 5, 5]]])

In [17]:
TENSOR.ndim

3

In [18]:
TENSOR1.shape

torch.Size([2, 2, 3])

### Random tensors

Why random tensors?

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> look at data -> update random numbers -? look at data -> update random numbers

In [19]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.5191, 0.0412, 0.0329, 0.4614],
        [0.4096, 0.2985, 0.5390, 0.8311],
        [0.3381, 0.3323, 0.9026, 0.1750]])

In [20]:
random_tensor.ndim

2

In [21]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, color chanels (R, G, B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

# Zeros and ones

In [22]:
# Create a tensor all zeros
zeros = torch.zeros(size=(3, 5))
zeros

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [23]:
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [24]:
ones.dtype

torch.float32

### Create a range of tensor and tensor-like

In [25]:
# Use torch.range()
torch.range(0, 10)

  torch.range(0, 10)


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [26]:
torch.arange(1, 11)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [27]:
range_w_step = torch.arange(start=10, end=100, step= 12)
range_w_step

tensor([10, 22, 34, 46, 58, 70, 82, 94])

In [28]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=range_w_step)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

**Note:** Tensor datatypes is one of the 3 big errors we'll runinto with PyTorch and deeo learning:

1. Tensors not having right datatype
2. Tensors not having right shape
3. Tensors not on the right devoice

In [29]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # specify the data type for the tensor elements
                               device=None, # specify the hardware
                               requires_grad=False) # specify whether to track gradients for backpropagation
float_32_tensor

tensor([3., 6., 9.])

In [30]:
float_32_tensor.dtype

torch.float32

In [31]:
float_16_tensor = float_32_tensor.type(torch.half) # === float_32_tensor.type(float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [32]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [33]:
int_16_tensor = torch.tensor([3, 6, 9], dtype=torch.int16)
int_16_tensor

tensor([3, 6, 9], dtype=torch.int16)

In [34]:
float_16_tensor * int_16_tensor # this is element-wise multiplication (Hadarmard) which involing casting of datatypes making it work

tensor([ 9., 36., 81.], dtype=torch.float16)

In [35]:
float_16_tensor.dot(int_16_tensor) # however, this does not work because dot product requires strict dtype

RuntimeError: dot : expected both vectors to have same dtype, but found Half and Short

from __future__ import division
### Manipulate Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* division
* Matrix multiplication

In [36]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [37]:
# Multiply tensor by 10
tensor * 10

tensor([10, 20, 30])

In [38]:
tensor - 10

tensor([-9, -8, -7])

In [39]:
# built-in functions from PyTorch
torch.mul(tensor, 10)

tensor([10, 20, 30])

### Matric Multiplication

Two main ways of performing multiplication in neural networks and deep learning:

1. Element-wise multiplication
2. Matrix multiplication(dot product)


In [40]:
# Element-wise multiplication
tensor * tensor

tensor([1, 4, 9])

In [41]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [42]:
# which is equal to
1*1 + 2*2 + 3*3

14

In [43]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 1.41 ms, sys: 0 ns, total: 1.41 ms
Wall time: 1.54 ms


In [44]:
%%time
torch.matmul(tensor, tensor)


CPU times: user 373 µs, sys: 62 µs, total: 435 µs
Wall time: 341 µs


tensor(14)

In [45]:
a = torch.rand(3, 2)
b = torch.rand(3, 2)

### element-wise multiplication
a * b

tensor([[0.6893, 0.4638],
        [0.0668, 0.5239],
        [0.0814, 0.1630]])

In [46]:
### matrix multiplication
torch.matmul(a, b.transpose(0, 1))

tensor([[1.1531, 0.7478, 0.5249],
        [1.0224, 0.5907, 0.4771],
        [0.6516, 0.7491, 0.2443]])

In [47]:
# same as
torch.matmul(a, b.T)

tensor([[1.1531, 0.7478, 0.5249],
        [1.0224, 0.5907, 0.4771],
        [0.6516, 0.7491, 0.2443]])

In [48]:
b.shape

torch.Size([3, 2])

### Finding the min, max, mean, sum, etc (tensor aggregation)

In [49]:
x = torch.arange(0, 100, step=10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [50]:
# find min
x.min(), torch.min(x)

(tensor(0), tensor(0))

In [51]:
# find max
torch.max(x)

tensor(90)

In [52]:
# find mean - torch.mean() requires a tensor of float32 datatype
torch.mean(x.type(torch.float32))

tensor(45.)

In [53]:
x.type(torch.float32).mean()

tensor(45.)

In [54]:
# find sum
torch.sum(x)

tensor(450)

### Find the positional min and max

In [55]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [56]:
# find the position in tensor that has the minimum value with argmin() -> returns the index of the value
x.argmin()

tensor(0)

In [57]:
# find the position in tensor that has maximum value with argmax()
torch.argmax(x)

tensor(9)

## Reshaping, stacking, squeezing and unsqueezing tensors

 * Reshaping - reshapes an input tensor to a defined shape
 * View - returns a view of an unput tensor of certaun shape but keep the same memory as the original tensor
 * Stacking - combines multiple tensors on top of each other (vstack) or side by side (hstack)
 * Squeeze - removes all `1` dimensions from a tensor
 * Unsqueeze - adds a ``1`` dimension to a target tensor
 * Permute - returns a view of the input with dimensions permuted (swapped) in a certain way

In [58]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [59]:
# Add an extra dimension
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [60]:
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape


(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [61]:
x_r = x.reshape(9, 1)
x_r, x_r.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [62]:
# Change the view
z = x.view(1, 9)
z

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [63]:
# Changinz  changes x (beccause a view of a tensor shares the same memory as the original)
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [64]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [65]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [73]:
# torch.squeeze() - removes all single dimensions from a target tensor
print(f"Previous tensor: {x_reshaped}")
print(f"Previpus shape: {x_reshaped.shape}")

# Remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previpus shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [69]:
x_reshaped.shape

torch.Size([1, 9])

In [70]:
x_reshaped.squeeze()

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [71]:
x_reshaped.squeeze().shape

torch.Size([9])

In [74]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim
x_unsqueezed = torch.unsqueeze(x_squeezed, dim=0)
x_unsqueezed

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [75]:
x_un = torch.unsqueeze(x_squeezed, dim=1)
x_un

tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

In [79]:
# torch.permute() - rearrange the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # [height, width, color_chanels]

# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->9
print(f"Previous shape: {x_original.shape}")
print(f"Permuted shape: {x_permuted.shape}")
x_permuted

Previous shape: torch.Size([224, 224, 3])
Permuted shape: torch.Size([3, 224, 224])


tensor([[[0.2876, 0.8355, 0.8809,  ..., 0.4199, 0.7012, 0.2914],
         [0.4071, 0.6017, 0.9672,  ..., 0.2425, 0.6946, 0.1285],
         [0.4105, 0.0782, 0.9067,  ..., 0.0060, 0.2303, 0.3306],
         ...,
         [0.7081, 0.1923, 0.3759,  ..., 0.3597, 0.3887, 0.2399],
         [0.3915, 0.3114, 0.7050,  ..., 0.7425, 0.4704, 0.0354],
         [0.8330, 0.2090, 0.8337,  ..., 0.3139, 0.9699, 0.4855]],

        [[0.8943, 0.5178, 0.8517,  ..., 0.4576, 0.4643, 0.6245],
         [0.7447, 0.0787, 0.6128,  ..., 0.6449, 0.3939, 0.1556],
         [0.5613, 0.4711, 0.8578,  ..., 0.2313, 0.5860, 0.1669],
         ...,
         [0.9987, 0.1361, 0.3477,  ..., 0.9350, 0.6766, 0.1188],
         [0.4738, 0.9702, 0.6884,  ..., 0.9159, 0.2125, 0.3610],
         [0.9497, 0.9544, 0.6932,  ..., 0.6091, 0.6182, 0.2242]],

        [[0.5841, 0.2820, 0.8678,  ..., 0.2297, 0.1272, 0.5781],
         [0.6804, 0.5882, 0.9080,  ..., 0.8190, 0.6654, 0.0333],
         [0.7240, 0.4926, 0.6687,  ..., 0.4688, 0.2786, 0.

In [91]:
x_permuted[0, 0, 0] = 111
x_permuted[0, 0, :5] # this is equal to x_permuted[0][0][:5]

tensor([111.0000,   0.8355,   0.8809,   0.9090,   0.6911])

# Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy

In [87]:
# Create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [88]:
# Index on the new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [92]:
# Index on the first dimension
x[0][1]

tensor([4, 5, 6])

In [93]:
# Index on the second dimension
x[0][1][2]

tensor(6)

In [94]:
x[:, 0]

tensor([[1, 2, 3]])

In [95]:
# Get all values of the 0th and 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [96]:
# Get all values of the 0th dimension but only the 1 index value of 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

## PyTorch tensors & NumPy

PyTorch has functionalities to interact with NumPy

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [97]:
# NumPy array to tensor
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # when converting from numpy -> pytorch, pytorch reflects numpy's default datatype of float64 from numpy
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [100]:
# Change the value of array, what will this do to `tensor`?
array = array + 1
array

array([ 4.,  5.,  6.,  7.,  8.,  9., 10.])

In [104]:
# Tensor to NumPy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [106]:
# Change the tensor, what happens to `numpy_tensor`?
tensor = tensor + 1
tensor, numpy_tensor

(tensor([3., 3., 3., 3., 3., 3., 3.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility



In [108]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.7250, 0.6079, 0.8801, 0.4625],
        [0.8739, 0.6769, 0.2235, 0.1934],
        [0.7722, 0.1712, 0.6523, 0.7242]])

Tensor B:
tensor([[0.9898, 0.9916, 0.7587, 0.6865],
        [0.8016, 0.6220, 0.2515, 0.7642],
        [0.7684, 0.7305, 0.8381, 0.9264]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [112]:
# Random tensors that are reproducible

# Set the random seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs

In [1]:
!nvidia-smi

Sun Mar 16 20:42:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   47C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Count number of device
torch.cuda.device_count()

1

## Putting tensors and models on the GPUs



In [6]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1, 2, 3])

tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [8]:
#Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

## Moving tensors back to the CPU (for NumPy computations)


In [9]:
# If tensor is ont GPU, can't transform it to NumPy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [12]:
# To fix the GPU tensor with NumPy issue, we can first set it to the CPU
tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu.numpy()

array([1, 2, 3])