In [None]:
import torch
torch.__version__

'2.6.0+cu124'

In [None]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
# to retrieve the number (only works with one-element tensors)
scalar.item()

7

Vector - single dimension tensor but can contain many numbers

In [None]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [None]:
# dim of vector
vector.ndim

1

Dimensions/Rank of a tensor is basically how many indexes/numbers/variables is required to represent a single element in that tensor.

For example in vectors 1 index is needed. In matrix 2 indexes are needed.

In [None]:
vector.shape

torch.Size([2])

In [None]:
matrix = torch.tensor([[1, 2],
                       [3, 4]])
print(f"shape: {matrix.shape}\ndim: {matrix.ndim}")

shape: torch.Size([2, 2])
dim: 2


In [None]:
tensor = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
print(f"shape: {tensor.shape}\ndim: {tensor.ndim}")

shape: torch.Size([1, 3, 3])
dim: 3


In [None]:
T2 = torch.ones([2,2,3,4,5])
print(f"shape: {T2.shape}\ndim: {T2.ndim}")
print(T2)

shape: torch.Size([2, 2, 3, 4, 5])
dim: 5
tensor([[[[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]]],


         [[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]]]],



        [[[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1.

Instead, a machine learning model often starts out with large random tensors of numbers and adjusts these random numbers as it works through data to better represent it.

In essence:

Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers...

As a data scientist, we can define how the machine learning model starts (initialization), looks at data (representation) and updates (optimization) its random numbers.

We can do so using torch.rand() and passing in the size parameter.

In [None]:
random_tensor = torch.rand((3,4))
# random_tensor = torch.rand(size = (3,4)) # Same as above
random_tensor

tensor([[0.2255, 0.7078, 0.0169, 0.5432],
        [0.2049, 0.1635, 0.0883, 0.5343],
        [0.7533, 0.1039, 0.5510, 0.5057]])

Creating tensor like a range() in python

In [None]:
one_to_ten_tensor = torch.arange(start=1, end=11, step=1)
one_to_ten_tensor

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# creating tensors alike
ten_zeroes = torch.zeros_like(one_to_ten_tensor)
ten_zeroes

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor Datatypes

Tensor datatypes is 3 one of the 3 big issues we run into:
1. Tensors not right dtype
2. Tensors not right shape
3. Tensors not on the right device

In [None]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # What datatype is the tensor , can define explicitly
                               device=None, # basically can select cuda, cpu, tpu
                               requires_grad=False) # whether or not to track gradient with tensors operations
float_32_tensor.dtype

torch.float32

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor # No error. But some operations will result in error because not in right dtype

tensor([ 9., 36., 81.])

In [None]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor

tensor([ 9., 36., 81.])

### Getting information from tensors (attributes)

1. To get datatype from a tensor, can use tensor.dtype
2. to get shape, tensor.shape
3. to get device from a tensor, tensor.device

In [None]:
# ctreate a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.6058, 0.6315, 0.8657, 0.6027],
        [0.1208, 0.8500, 0.3479, 0.6399],
        [0.2699, 0.5262, 0.5136, 0.9910]])

In [None]:
print(some_tensor)
print(f"Datatype: {some_tensor.dtype}")
print(f"Shape: {some_tensor.shape}")
print(f"Shape: {some_tensor.size()}") # same thing as tensor.shape
print(f"Device: {some_tensor.device}")

tensor([[0.6058, 0.6315, 0.8657, 0.6027],
        [0.1208, 0.8500, 0.3479, 0.6399],
        [0.2699, 0.5262, 0.5136, 0.9910]])
Datatype: torch.float32
Shape: torch.Size([3, 4])
Shape: torch.Size([3, 4])
Device: cpu


### Manipulating Tensors (tensor operations)

tensor operations include:
* Add
* Sub
* Mult (element wise)
* Div
* Matrix Mult

In [None]:
# Create a sample tensors
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [None]:
# Mult
tensor * 10

tensor([10, 20, 30])

In [None]:
# Sub
tensor - 10

tensor([-9, -8, -7])

In [None]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

### Matrix mult

2 main ways to perform multiplication in NN and DL.
1. Element-wise
2. Matrix Mult (dot prodcut)

In [None]:
# Element wise
%%time
print(tensor * tensor)

tensor([1, 4, 9])
CPU times: user 1.5 ms, sys: 90 µs, total: 1.59 ms
Wall time: 2.23 ms


In [None]:
# Matrix mult
%%time
torch.matmul(tensor, tensor) # can also do `tensor @ tensor`

CPU times: user 0 ns, sys: 803 µs, total: 803 µs
Wall time: 9.36 ms


tensor(14)

In [None]:
torch.matmul(torch.rand(3, 2), torch.rand(2, 3))

tensor([[0.3974, 0.2813, 0.2206],
        [0.5106, 0.5658, 0.2291],
        [0.4469, 0.5243, 0.1928]])

In [None]:
# Shapes for matrix multiplication
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

To fix this issue. We take transpose of B.

In [None]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

torch.mm(tensor_A, tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

## Finding the min. max, mean etc (tensor aggregation)

In [None]:
x = torch.arange(0, 100, 10)

In [None]:
# Min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [None]:
# Max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
# Find the mean
torch.mean(x) # Dtype error. It is Long.

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [None]:
# torch.mean() func require a tensor of float32 Dtype to work
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [None]:
# Sum
torch.sum(x), x.sum()
x = x + 1

(tensor(450), tensor(450))

In [None]:
# Find the position with min/max vlaue
# Min value postion
print(x.argmin()) # o/p is 0

print(x[x.argmin()]) # o/p is 1

# Max value postn
print(x.argmax())
print(x[x.argmax()])

tensor(0)
tensor(1)
tensor(9)
tensor(91)


## Reshaping, squeezing and unsqueezing tensors

* Reshaping - Reshaping an i/p tensor to a defined shape.
* View - Return a view of an i/p tensor of certain shape but keep same memory as original tensor
* Stacking - Combine multiple tensors on top or side by side.
* Squeeze - Removes all `1` from a tensor
* Unsqueeze - Add a `1` dimension to a target tensor
* Permute - Return a view of the i/p with dimensions permuted (swapped) in a certain way


In [None]:
x = torch.arange(1., 10.)

In [None]:
x_reshaped = x.reshape(3, 3) # other shapes which can work is (1, 9) or (9, 1)
print(x_reshaped)


tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
# Change the view
z = x.view(3, 3)
z # The view shares the memory with the x. So chaning z changes x and vice versa.

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3.],
         [5., 5., 6.],
         [5., 8., 9.]]),
 tensor([5., 2., 3., 5., 5., 6., 5., 8., 9.]))

In [None]:
# Stacking tensors on top
x_stacked = torch.stack([x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 5., 5., 6., 5., 8., 9.],
        [5., 2., 3., 5., 5., 6., 5., 8., 9.]])

In [None]:
# Squeeze and unsqueeze
x_new = torch.zeros(2, 1, 2, 1, 2)
x_new

tensor([[[[[0., 0.]],

          [[0., 0.]]]],



        [[[[0., 0.]],

          [[0., 0.]]]]])

In [None]:
x_squeezed = x_new.squeeze()
x_squeezed, x_squeezed.shape, x_new

(tensor([[[0., 0.],
          [0., 0.]],
 
         [[0., 0.],
          [0., 0.]]]),
 torch.Size([2, 2, 2]),
 tensor([[[[[0., 0.]],
 
           [[0., 0.]]]],
 
 
 
         [[[[0., 0.]],
 
           [[0., 0.]]]]]))

In [None]:
# Unsqueeze - adds a single dimension to a target tensor at a specific dim
print(f"Previous tensor: {x_squeezed}\n")
print(f"Previous Shape: {x_squeezed.shape}\n")
x_unsqueezed = x_squeezed.unsqueeze(dim=1)

print(f"New Tensor: {x_unsqueezed}\n")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

Previous Shape: torch.Size([2, 2, 2])

New Tensor: tensor([[[[0., 0.],
          [0., 0.]]],


        [[[0., 0.],
          [0., 0.]]]])

New shape: torch.Size([2, 1, 2, 2])


In [None]:
x_unsqueezed = x_squeezed.unsqueeze(dim=2)

print(f"New Tensor: {x_unsqueezed}\n")
print(f"New shape: {x_unsqueezed.shape}")

New Tensor: tensor([[[[0., 0.]],

         [[0., 0.]]],


        [[[0., 0.]],

         [[0., 0.]]]])

New shape: torch.Size([2, 2, 1, 2])


In [None]:
# Permute - rearranges the dimensions of a target tensor in a specificed order
x_original = torch.rand((224, 224, 3))
print(x_original.shape)
# Permute the original tensor to rearrange the axis
x_permuted = x_original.permute(2, 0, 1) # shift axis to 0->1, 1->2, 2->0
print(x_permuted.shape) # permute function basically returns a view. SO any changes will be reflected in the permuted tensor

torch.Size([224, 224, 3])
torch.Size([3, 224, 224])


In [None]:
x_original[0, 0, 0] = 184981
x_original[0, 0, 0], x_permuted[0, 0, 0]

(tensor(184981.), tensor(184981.))

## Indexing
indexing with pytorch is very similar to numpy/pandas

In [None]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [None]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
x[0][1], x[0, 1] # Same

(tensor([4, 5, 6]), tensor([4, 5, 6]))

In [None]:
x[0, 1, 2] # should return 6

tensor(6)

In [None]:
# wecan use ":" like in pandas/numpy to select "all" of a traget dimensions
x[:, 2]

tensor([[7, 8, 9]])

In [None]:
x[:, :, 1] # should return 1st column

tensor([[2, 5, 8]])

## Pytorch and numpy

Pytorch has functionality to interact with numpy

* NumPy array -> PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy array -> `torch.Tensor.numpy()`

In [1]:
# Numpy array to tensor
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor # the reason tensor is float64 even though its default is float32 is because numpy's default is float64

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [2]:
array.dtype # When converting numpy to pytorch, pytorch reflects numpy's default dtype unless specified otherwise

dtype('float64')

In [3]:
# changing value of array
array = array + 1
array, tensor # dosen't change value of tensor. SO not an view.

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [4]:
# Tensor to numpy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [6]:
# Chage the tensor
tensor = tensor + 1
tensor, numpy_tensor

(tensor([3., 3., 3., 3., 3., 3., 3.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take random out of random)

In short how a NN learns:

`start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again -> again...`

To reduce the randomness we use **random seed**

In [9]:
tensor_b = torch.rand(3, 4)
tensor_a = torch.rand(3, 4)
print(tensor_a, "\n", tensor_b)
print(tensor_a == tensor_b)

tensor([[0.4204, 0.1041, 0.9439, 0.8118],
        [0.2265, 0.5741, 0.7046, 0.2884],
        [0.5061, 0.1956, 0.7592, 0.7585]]) 
 tensor([[0.5166, 0.2316, 0.9502, 0.9922],
        [0.6637, 0.1199, 0.0091, 0.2766],
        [0.3154, 0.1582, 0.6675, 0.4072]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [13]:
torch.manual_seed(42)

tensor_c = torch.rand(3, 4)

torch.manual_seed(42)
tensor_d = torch.rand(3, 4)

print(tensor_c, "\n", tensor_d)
print(tensor_c == tensor_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]]) 
 tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on GPUs - for faster computations

GPUs - faster computation, thanks to CUDA + Nvidia hardware + PyTorch working behind the scenes

### 1. Getting a GPU

1. Use google colab
2. Use your own GPU
3. Use cloud computing - GCP, AWS, Azure

2, 3 requires little bit of setup for CUDA. use PyTorch documentation.

In [1]:
!nvidia-smi

Mon Jun 16 23:27:11 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   37C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### 2. Check GPU acces

In [2]:
# Check for GPU access
import torch
torch.cuda.is_available()

True

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Count number of devices
torch.cuda.device_count()

1

### 3. Putting tensors and odels on GPU

The reason we want out tensors/models on the GPU is because GPU results in faster computations

In [5]:
tensor = torch.tensor([1, 2, 3])

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [6]:
# moving tensor to GPU
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### 4. Moing tensors back to cpu

In [7]:
# if tensors are on GPU cant transform it to numpy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [9]:
# To fi =x the gpu issue with numpy, we can first set it to the cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])