In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print(torch.__version__)

2.5.1+cu121


In [2]:
!nvidia-smi # Runs when connected to a GPU

Thu Jan 23 02:09:22 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Introduction to Tensors

### Creating tensors

In [3]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [13]:
scalar.ndim # Scalar

0

In [5]:
# Get tensor back as Python int
scalar.item()

7

In [8]:
# Vector
vector = torch.tensor([7,7]) # 1D array
vector

tensor([7, 7])

In [7]:
vector.ndim # Contains 1 row. So, 1(row) + 0(columns) = 1

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# MATRIX
MATRIX = torch.tensor([[7,8],[9,10]]) # 2D Array
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [12]:
MATRIX.ndim # Contains 2 rows and 2 columns. So, 1(rows) + 1(columns) = 2

2

In [14]:
MATRIX[1]

tensor([ 9, 10])

In [15]:
MATRIX.shape

torch.Size([2, 2])

In [16]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [17]:
TENSOR.ndim  # 1 Batch contains 3 rows and 3 columns. So, 1(batch) + 1(rows) + 1(columns) = 3

3

In [18]:
TENSOR.shape

torch.Size([1, 3, 3])

In [19]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [20]:
print(TENSOR[0][1])
print(TENSOR[0][1][0])

tensor([4, 5, 6])
tensor(4)


### Random Tensors

#### Why random tensors?

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers (Backpropagation Gradient Descent) to better represent the data.

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers`

In [24]:
# Create a random tensor of size/shape (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.7185, 0.4311, 0.1542, 0.5706],
        [0.5650, 0.9128, 0.0765, 0.5057],
        [0.0106, 0.9054, 0.4646, 0.7303]])

In [25]:
random_tensor.ndim # Contains rows and columns. So, 1(rows) + 1(columns) = 2

2

In [26]:
random_tensor.shape # Alternatively, we can use random_tensor.size()

torch.Size([3, 4])

In [27]:
random_tensor.size()

torch.Size([3, 4])

In [29]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224,224,3)) # height (nh), width (nw), color channels (nc) (RGB)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and Ones

This is helpful when we are creating some form of mask (mask of an image)

In [30]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [31]:
zeros*random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [32]:
# Create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [33]:
ones.dtype

torch.float32

In [34]:
random_tensor.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [35]:
# Use torch.arange()
torch.arange(1,11)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [36]:
torch.arange(start=0, end=100)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
        72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
        90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [37]:
torch.arange(start=0, end=100, step=10)

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [38]:
# Creating tensors-like. To get new tensor with same shape/size as any existing tensor's shape/size

one_to_ten = torch.arange(start=1, end=11, step=1)
print(one_to_ten)
ten_zeros = torch.zeros_like(input=one_to_ten) # Same shape as one_to_ten tensor
print(ten_zeros)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


### Tensor datatypes

**Note:** Tensor datatypes is one of the big errors we might run into with PyTorch and Deep Learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

In [39]:
# Float 32 tensor (default datatype)
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # Even though dtype is None, it will by default adjust to "float32"
                               device=None, # Even though device is None, by default, it is "cpu". We can have device as "cuda" as well
                               requires_grad=False) # To track gradients
float_32_tensor

tensor([3., 6., 9.])

In [40]:
float_32_tensor.dtype

torch.float32

In [41]:
# Float 16 tensor
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [42]:
float_16_tensor.dtype

torch.float16

In [43]:
# We can also get float16 tensor from float32 tensor
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [44]:
float_32_tensor * float_16_tensor
# Right now, since elements are very small (3.0, 6.0, 9.0) in both float32 and float16, it won't give an error.
# But if there was very big number, beyond range of float16 in float32 tensor, then it would have given an error.

tensor([ 9., 36., 81.])

In [45]:
# Change runtime to GPU

int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32, device="cuda")
int_32_tensor

tensor([3, 6, 9], device='cuda:0', dtype=torch.int32)

In [46]:
# Change runtime to GPU

# Now, by default, float_32_tensor is on "cpu" and we stated device for int_32_tensor as "cuda"
# So, the below code will give an error since they are not on the same device
float_32_tensor * int_32_tensor # Error

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [47]:
print(float_32_tensor.device) # cpu
print(float_16_tensor.device) # cpu
print(int_32_tensor.device) # cuda

cpu
cpu
cuda:0


### Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (Element-wise)
* Division
* Matrix multiplication

In [48]:
# Addition
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [49]:
# Multiplication (Scalar)
tensor * 10

tensor([10, 20, 30])

In [50]:
# Division
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [51]:
# Subtraction
tensor - 10

tensor([-9, -8, -7])

In [52]:
# PyTorch in-built functions
print(torch.mul(tensor, 10))
print(torch.add(tensor, 10))

tensor([10, 20, 30])
tensor([11, 12, 13])


In [53]:
# Multiplication (Element-wise)
tensor * tensor

tensor([1, 4, 9])

In [54]:
tensor.shape

torch.Size([3])

In [55]:
# Matrix Multiplication
tensor.matmul(tensor)

tensor(14)

In [56]:
# Matrix Multiplication
tensor @ tensor # Alternate Method

tensor(14)

In [57]:
# Matrix Multiplication
torch.matmul(tensor, tensor) # Alternate Method

tensor(14)

In [58]:
# Matrix Multiplication
torch.mm(tensor, tensor) # Alternate Method, but can only be used for matrix, not for scalars and vectors

RuntimeError: self must be a matrix

In [65]:
# Calculating "time" it takes in Matrix Multiplication Operation
%%time
torch.matmul(tensor, tensor) # Can be used as an alternative to tensor.matmul(tensor)

CPU times: user 66 µs, sys: 12 µs, total: 78 µs
Wall time: 82 µs


tensor(14)

In [66]:
torch.matmul(torch.rand(7,2), torch.rand(2,8)) # We get 7 by 8 matrix

tensor([[0.9136, 0.7277, 0.5253, 1.0991, 0.6743, 0.5330, 1.2385, 0.6824],
        [0.4190, 0.3168, 0.2475, 0.4855, 0.3122, 0.2505, 0.5626, 0.3017],
        [0.6015, 0.6454, 0.2813, 0.9059, 0.4147, 0.2924, 0.8688, 0.5599],
        [0.3907, 0.7012, 0.0733, 0.8977, 0.2200, 0.0907, 0.6551, 0.5514],
        [0.9599, 0.5585, 0.6318, 0.9288, 0.7445, 0.6325, 1.2349, 0.5798],
        [0.3910, 0.2430, 0.2513, 0.3953, 0.3005, 0.2522, 0.5080, 0.2465],
        [0.6415, 0.4750, 0.3828, 0.7323, 0.4797, 0.3869, 0.8580, 0.4552]])

In [67]:
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

torch.mm(tensor_A, tensor_B) # Error

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [68]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [69]:
tensor_B, tensor_B.shape

(tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]),
 torch.Size([3, 2]))

In [70]:
# Transpose
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [71]:
torch.mm(tensor_A, tensor_B.T) # Now, it won't give any error and will work fine.

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Tensor Aggregation Functions

In [73]:
x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [74]:
x.dtype

torch.int64

In [75]:
# Min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [76]:
# Max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [77]:
# Mean

torch.mean(x) # Error -> (Input dtype must be either a floating point or complex dtype.)

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [78]:
# Mean
torch.mean(x.type(torch.float32)) , x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [79]:
# Sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

### Finding the positional min and max

In [80]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [81]:
# Find the position in tensor that has the minimum value with argmin() -> returns index position of target tensor where the minimum value occurs
x.argmin()

tensor(0)

In [82]:
x[0]

tensor(0)

In [83]:
# Find the position in tensor that has the maximum value with argmax() -> returns index position of target tensor where the maximum value occurs
x.argmax()

tensor(9)

In [84]:
x[9]

tensor(90)

### Reshaping, View, Stacking, Squeezing and Unsqueezing tensors

* Reshaping -  reshapes an input tensor to a definded shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor.
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [85]:
x = torch.arange(1.,10.)

In [86]:
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [87]:
# Add an extra dimension
x_reshaped = x.reshape(1,7) # Error

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [88]:
# Add an extra dimension
x_reshaped = x.reshape(1,9)

In [89]:
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [90]:
x_reshaped = x.reshape(9,1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [91]:
# Change the view
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

Changing 'z' above will change 'x' as well because they share same memory (view of a tensor shares the same memory as the original tensor)

In [93]:
z[:,0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [94]:
# Stack tensors on top of each other
x_stack = torch.stack([x,x,x,x], dim=0)
x_stack

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [95]:
# Stack tensors side by side
x_stack = torch.stack([x,x,x,x], dim=1)
x_stack

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [96]:
# torch.squeeze() - removes all single dimensions from a target tensor
x_reshaped = x_reshaped.reshape(1,9)
x_reshaped

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [97]:
x_reshaped.shape

torch.Size([1, 9])

In [98]:
x_reshaped.squeeze()

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [99]:
x_reshaped.squeeze().shape

torch.Size([9])

In [100]:
 # torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim (dimension)
x_squeeze = x_reshaped.squeeze()
x_unsqueeze_dim0 = x_squeeze.unsqueeze(dim=0)
x_unsqueeze_dim1 = x_squeeze.unsqueeze(dim=1)

In [101]:
x_unsqueeze_dim0, x_unsqueeze_dim0.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [102]:
x_unsqueeze_dim1, x_unsqueeze_dim1.shape

(tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [103]:
# torch.permute - rearrange the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224,224,3)) # height, width, color channels
x_permute = x_original.permute(2,0,1) # color channels, height, width.

In [104]:
x_original.shape, x_permute.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))

permute is a view so x_original and x_permute share same memory and change in any one of them will affect both

In [105]:
x_original[0,0,0] # Alternate to x_original[0][0][0]

tensor(0.8551)

In [106]:
x_permute[0,0,0]

tensor(0.8551)

In [107]:
x_original[0,0,0] = 0.9273

In [108]:
x_permute[0,0,0] # Since permute is a view, it will also get updated

tensor(0.9273)

### Indeixng (seleting data from tensors)

Indeixng with PyTorch is similar to indexing with NumPy

In [125]:
x = torch.arange(1,10).reshape(1,3,3)

In [126]:
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [127]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [128]:
x[0][1]

tensor([4, 5, 6])

In [129]:
x[0][1][2]

tensor(6)

In [130]:
# A Tensor with more than 1 elements cannot be converted to Scalar
x[0][1].item() # Error

RuntimeError: a Tensor with 3 elements cannot be converted to Scalar

In [131]:
x[0][1][2].item() # Works fine

6

In [132]:
x[:,1] # Alternate to x[0][1]

tensor([[4, 5, 6]])

In [133]:
x[:,1,2] # Alternate to x[0][1][2]

tensor([6])

In [134]:
x[0,1] # Alternate to x[0][1]

tensor([4, 5, 6])

In [135]:
x[0,1,2] # Alternate to x[0][1][2]

tensor(6)

In [136]:
x[0,1,:] # Alternate to x[0][1]

tensor([4, 5, 6])

In [137]:
x[0,1,2:] # Alternate to x[0][1][2]

tensor([6])

In [138]:
x[:,:,2] # All the elements in last column

tensor([[3, 6, 9]])

### PyTorch tensors and NumPy

In [139]:
import numpy as np

In [140]:
# Convert data in NumPy to PyTorch tensor
array = np.arange(1.0,8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

NumPy array and PyTorch tensor does not share same memory and hence doesn't affect each other

In [141]:
array.dtype # default is float64 for NumPy

dtype('float64')

In [142]:
tensor.dtype # since NumPy array has default float64, our tensor is of dtype float64

torch.float64

In [143]:
# To change the dtype of tensor to default float32 from float64
tensor = tensor.type(torch.float32)
tensor.dtype

torch.float32

In [144]:
# PyTorch tensor to NumPy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [145]:
array.dtype # since PyTorch tensor has default float32, our NumPy array is of dtype float32

dtype('float64')

### Reproducibility (trying to take random out of random)

To reduce the randomness in PyTorch, we will use the concept of **random seed**



In [146]:
random_tensor_A = torch.rand(3,4)
random_tensor_B =  torch.rand(3,4)

In [147]:
random_tensor_A == random_tensor_B

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [148]:
# Let's make some random but reproducible tensors

# set the random seed (For each tensor)
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED) # for random_tensor_C
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED) # for random_tensor_D
random_tensor_D = torch.rand(3,4)

In [149]:
random_tensor_C == random_tensor_D

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

### Running tensors and PyTorch objects on the GPUs (and making faster computations)

https://pytorch.org/docs/stable/notes/cuda.html#best-practices

In [150]:
# Check for GPU access with PyTorch
torch.cuda.is_available()

True

In [151]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [152]:
# Count number of devices
torch.cuda.device_count()

1

### Putting tensors (and models) on the GPU

In [153]:
tensor = torch.tensor([1,2,3])
tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [154]:
# Move tensor to GPU (if available)
# Change runtime to GPU
tensor_on_gpu = tensor.to(device)
tensor_on_gpu, tensor_on_gpu.device

(tensor([1, 2, 3], device='cuda:0'), device(type='cuda', index=0))

### Moving tensors back to the CPU

In [155]:
# if tensor is on GPU, can't transform it to NumPy
tensor_on_gpu.numpy() #Error

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [156]:
# To fix the above issue
tensor_back_on_cpu = tensor_on_gpu.cpu()

In [157]:
tensor_back_on_cpu.device, tensor_on_gpu.device

(device(type='cpu'), device(type='cuda', index=0))

In [158]:
tensor_back_on_cpu.numpy() # Works fine

array([1, 2, 3])