In [152]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(f"torch: v{torch.__version__}\npandas: v{pd.__version__}\nnumpy: v{np.__version__}")

torch: v2.2.2
pandas: v2.2.1
numpy: v1.26.4


## Inroduction to Tensors

### Creating Tensors

https://pytorch.org/docs/stable/tensors.html

![Scalar|Vector|Matrix|Tensor](images/svmt.png)

In [108]:
# Scalar - a single number
scalar = torch.tensor(7)
scalar

tensor(7)

In [109]:
scalar.ndim

0

In [110]:
scalar.item()

7

In [111]:
# Vector - a number with direction (e.g. wind speed with direction) but can also have many other numbers

vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [112]:
vector.ndim

1

In [113]:
vector.shape

torch.Size([2])

In [114]:
# MATRIX - a 2-dimensional array of numbers
MATRIX = torch.tensor([
    [7,8],
    [9,10]
])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [115]:
MATRIX.ndim

2

In [116]:
MATRIX.shape

torch.Size([2, 2])

In [117]:
# TENSOR - an n-dimensional array of numbers
TENSOR = torch.tensor([[
    [1,2,3],
    [4,5,6],
    [7,8,9],
    [11,12,13],
]])
TENSOR

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9],
         [11, 12, 13]]])

In [118]:
TENSOR.ndim

3

In [119]:
TENSOR.shape

torch.Size([1, 4, 3])

# Scalar | Vector | Matrix | Tensor
![Scalar|Vector|Matrix|Tensor](images/00-scalar-vector-matrix-tensor.png)

In [120]:
random_tensor = torch.rand(2, 3, 4)
random_tensor.ndim, random_tensor.shape 

(3, torch.Size([2, 3, 4]))

In [121]:
random_tensor[0]

tensor([[0.3813, 0.4937, 0.7975, 0.6947],
        [0.6742, 0.6710, 0.7174, 0.6410],
        [0.0750, 0.3849, 0.6500, 0.3122]])

In [122]:
range_of_number = torch.arange(0, 10)
range_of_number, range_of_number[1]

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), tensor(1))

In [123]:
# Float 32 tensor
float_32_tensor = torch.tensor(
    [3.0, 6.0, 9.0],
    dtype=torch.float32,
    device='cpu',
    requires_grad=False)

(
    float_32_tensor,
    float_32_tensor.dtype,
    float_32_tensor.device,
    float_32_tensor.requires_grad,
    float_32_tensor.shape,
    float_32_tensor.size(),
)

(tensor([3., 6., 9.]),
 torch.float32,
 device(type='cpu'),
 False,
 torch.Size([3]),
 torch.Size([3]))

In [124]:
float_16_tensor = float_32_tensor.type(torch.float16)

(
    float_16_tensor,
    float_16_tensor.dtype,
    float_16_tensor.device,
    float_16_tensor.requires_grad,
    float_16_tensor.shape,
    float_16_tensor.size(),
)

(tensor([3., 6., 9.], dtype=torch.float16),
 torch.float16,
 device(type='cpu'),
 False,
 torch.Size([3]),
 torch.Size([3]))

In [125]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

### Manipulation Tensors (tensor operation)

* Addition
* Subtraction
* Division
* Multiplacation (element-wise)
* Matrix Multiplication

#### The main two rules for matrix multiplication to remember are:

1. The inner dimensions must match:
(3, 2) @ (3, 2) won't work
(2, 3) @ (3, 2) will work
(3, 2) @ (2, 3) will work

2. The resulting matrix has the shape of the outer dimensions:
(2, 3) @ (3, 2) -> (2, 2)
(3, 2) @ (2, 3) -> (3, 3)


![Matrix Multiplacation](images/matrix-multiplication.png)

In [126]:
matrix_a = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
])

matrix_b = torch.tensor([
    [10, 11],
    [20, 21],
    [30, 31],
])

matrix_ab = torch.matmul(matrix_a, matrix_b)
matrix_ab

tensor([[140, 146],
        [320, 335]])

In [127]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


### Reshaping, stacking, squeezing and unsqueezing
Often times you'll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so, some popular methods are:

| Method                        | One-line description                                                                                        |
|-------------------------------|-------------------------------------------------------------------------------------------------------------|
| `torch.reshape(input, shape)` | Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().                               |
| `Tensor.view(shape)`          | Returns a view of the original tensor in a different shape but shares the same data as the original tensor. |
| `torch.stack(tensors, dim=0)` | Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.              |
| `torch.squeeze(input)`        | Squeezes input to remove all the dimenions with value 1.                                                    |
| `torch.unsqueeze(input, dim)` | Returns input with a dimension value of 1 added at dim.                                                     |
| `torch.permute(input, dims)`  | Returns a view of the original input with its dimensions permuted (rearranged) to dims.                     |

Why do any of these?

Because deep learning models (neural networks) are all about manipulating tensors in some way. And because of the rules of matrix multiplication, if you've got shape mismatches, you'll run into errors. These methods help you make sure the right elements of your tensors are mixing with the right elements of other tensors.

In [128]:
# Torch Reshape - https://pytorch.org/docs/stable/generated/torch.reshape.html
x = torch.arange(120.)
x, x.shape

(tensor([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
          12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,
          24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,  34.,  35.,
          36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,
          48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.,  59.,
          60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,
          72.,  73.,  74.,  75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,
          84.,  85.,  86.,  87.,  88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.,
          96.,  97.,  98.,  99., 100., 101., 102., 103., 104., 105., 106., 107.,
         108., 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119.]),
 torch.Size([120]))

In [129]:
# Torch Reshape - https://pytorch.org/docs/stable/generated/torch.reshape.html
x_reshaped = torch.reshape(x, (2, 3, 4, 5))
x_reshaped, x_reshaped.shape

(tensor([[[[  0.,   1.,   2.,   3.,   4.],
           [  5.,   6.,   7.,   8.,   9.],
           [ 10.,  11.,  12.,  13.,  14.],
           [ 15.,  16.,  17.,  18.,  19.]],
 
          [[ 20.,  21.,  22.,  23.,  24.],
           [ 25.,  26.,  27.,  28.,  29.],
           [ 30.,  31.,  32.,  33.,  34.],
           [ 35.,  36.,  37.,  38.,  39.]],
 
          [[ 40.,  41.,  42.,  43.,  44.],
           [ 45.,  46.,  47.,  48.,  49.],
           [ 50.,  51.,  52.,  53.,  54.],
           [ 55.,  56.,  57.,  58.,  59.]]],
 
 
         [[[ 60.,  61.,  62.,  63.,  64.],
           [ 65.,  66.,  67.,  68.,  69.],
           [ 70.,  71.,  72.,  73.,  74.],
           [ 75.,  76.,  77.,  78.,  79.]],
 
          [[ 80.,  81.,  82.,  83.,  84.],
           [ 85.,  86.,  87.,  88.,  89.],
           [ 90.,  91.,  92.,  93.,  94.],
           [ 95.,  96.,  97.,  98.,  99.]],
 
          [[100., 101., 102., 103., 104.],
           [105., 106., 107., 108., 109.],
           [110., 111., 112., 113., 

In [130]:
# https://pytorch.org/docs/stable/generated/torch.Tensor.view.html
x_view = x_reshaped.view(8, 15)
x_view, x_view.shape

(tensor([[  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
           12.,  13.,  14.],
         [ 15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,
           27.,  28.,  29.],
         [ 30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,
           42.,  43.,  44.],
         [ 45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,
           57.,  58.,  59.],
         [ 60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,
           72.,  73.,  74.],
         [ 75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,
           87.,  88.,  89.],
         [ 90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99., 100., 101.,
          102., 103., 104.],
         [105., 106., 107., 108., 109., 110., 111., 112., 113., 114., 115., 116.,
          117., 118., 119.]]),
 torch.Size([8, 15]))

In [131]:
# View of a tensor shares the same memory as the original tensor
x_view[7][14] = 420
x_view, x_reshaped

(tensor([[  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
           12.,  13.,  14.],
         [ 15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,
           27.,  28.,  29.],
         [ 30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,
           42.,  43.,  44.],
         [ 45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,
           57.,  58.,  59.],
         [ 60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,
           72.,  73.,  74.],
         [ 75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,
           87.,  88.,  89.],
         [ 90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99., 100., 101.,
          102., 103., 104.],
         [105., 106., 107., 108., 109., 110., 111., 112., 113., 114., 115., 116.,
          117., 118., 420.]]),
 tensor([[[[  0.,   1.,   2.,   3.,   4.],
           [  5.,   6.,   7.,   8.,   9.],
           [ 10.,  11., 

In [132]:
# Torch Stack - Concatenates a sequence of tensors along a new dimension
s = torch.randn(3, 4)
s, s.shape

(tensor([[-1.1884, -0.4684,  0.1036,  0.0903],
         [ 0.4505,  0.4147,  0.2110,  0.7234],
         [-0.7373, -0.4509, -1.3649,  0.0669]]),
 torch.Size([3, 4]))

In [133]:
s1 = torch.stack((s,), dim=2)
s1, s1.shape

(tensor([[[-1.1884],
          [-0.4684],
          [ 0.1036],
          [ 0.0903]],
 
         [[ 0.4505],
          [ 0.4147],
          [ 0.2110],
          [ 0.7234]],
 
         [[-0.7373],
          [-0.4509],
          [-1.3649],
          [ 0.0669]]]),
 torch.Size([3, 4, 1]))

In [134]:
# torch.squeeze - removes all single dimensions from a target tensor
s2 = s1.squeeze()
s2, s2.shape

(tensor([[-1.1884, -0.4684,  0.1036,  0.0903],
         [ 0.4505,  0.4147,  0.2110,  0.7234],
         [-0.7373, -0.4509, -1.3649,  0.0669]]),
 torch.Size([3, 4]))

In [135]:
# Valid dim is 0 <= dim < row
s3 = torch.randn(2, 3, 4)
s3 = s3.unsqueeze(dim=3)
s3, s3.shape

(tensor([[[[ 0.6464],
           [ 1.0264],
           [ 0.4277],
           [ 2.5017]],
 
          [[-1.6657],
           [ 1.5276],
           [ 0.3070],
           [-1.2935]],
 
          [[ 0.5641],
           [-0.6883],
           [ 1.5459],
           [ 1.3052]]],
 
 
         [[[-1.9791],
           [-0.3833],
           [-0.4383],
           [-1.2587]],
 
          [[ 1.2984],
           [-0.3189],
           [-0.6529],
           [ 0.1451]],
 
          [[ 0.0842],
           [ 1.0583],
           [ 0.3804],
           [-0.1970]]]]),
 torch.Size([2, 3, 4, 1]))

In [136]:
# torch.permute - rearrange the diamendions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # height px, width px, color - RGB

# Permute the original tensor to rearrange the axis (or dim) order
# i.e shift 2nd index at first
x_permuted = x_original.permute(2, 0, 1) # color - RGB, height px, width px
print(f"Previous Shape: {x_original.shape}")
print(f"New Shape: {x_permuted.shape}")

Previous Shape: torch.Size([224, 224, 3])
New Shape: torch.Size([3, 224, 224])


In [137]:
# As permute is another form of view
x_original[0, 0, 0] = 123
x_permuted

tensor([[[1.2300e+02, 8.9718e-01, 8.8294e-01,  ..., 4.6177e-01,
          5.6759e-02, 6.1637e-01],
         [3.1799e-01, 5.8563e-01, 1.5749e-01,  ..., 3.1214e-02,
          4.3181e-01, 3.1763e-01],
         [9.8469e-02, 2.6017e-01, 2.1782e-01,  ..., 2.9206e-01,
          5.7326e-01, 3.1044e-01],
         ...,
         [1.8289e-01, 5.6318e-01, 4.3357e-01,  ..., 6.1456e-01,
          5.5831e-01, 2.8226e-01],
         [4.6256e-01, 2.0003e-01, 8.9312e-01,  ..., 4.0309e-01,
          7.9244e-01, 6.5863e-01],
         [2.0825e-01, 8.6543e-01, 6.1006e-01,  ..., 4.4326e-01,
          7.2815e-01, 4.5644e-01]],

        [[3.5764e-01, 7.4956e-01, 9.7185e-01,  ..., 3.9514e-01,
          7.5380e-02, 5.1866e-02],
         [7.9816e-02, 2.8220e-01, 3.6376e-01,  ..., 8.8540e-01,
          6.9155e-01, 4.4812e-02],
         [1.5864e-01, 9.9441e-01, 6.6870e-03,  ..., 3.4789e-01,
          7.1704e-01, 1.6931e-01],
         ...,
         [4.1672e-01, 8.5981e-01, 2.2194e-01,  ..., 7.2428e-01,
          9.368

# PyTorch tensors & NumPy
Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.

The two main methods you'll want to use for NumPy to PyTorch (and back again) are:

* torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.
* torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

In [143]:
# Convert Numpy Array to Tensor
np_array = np.arange(1, 13)
tensor_array = torch.from_numpy(np_array).type(torch.float32)
tensor_array, tensor_array.dtype

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]),
 torch.float32)

Note: By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

In [148]:
# Convert Tensor to NumPy
tensor_array_2 = torch.randn(5, 3).type(torch.float16)
np_array_2 = tensor_array_2.numpy()
np_array_2

array([[ 1.208 , -0.853 ,  2.361 ],
       [ 0.4146,  1.675 , -1.122 ],
       [ 0.2866, -0.4272,  1.474 ],
       [-0.4614, -1.48  ,  1.289 ],
       [-0.1599,  0.555 , -0.731 ]], dtype=float16)

# Reproducibility (trying to take the random out of random)

Reference Link - [https://pytorch.org/docs/stable/notes/randomness.html](https://pytorch.org/docs/stable/notes/randomness.html)

Random Seed [https://en.wikipedia.org/wiki/Random_seed](https://en.wikipedia.org/wiki/Random_seed)

In [149]:
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.5980, 0.2668, 0.2344, 0.8088],
        [0.5446, 0.2481, 0.3540, 0.0569],
        [0.3095, 0.4791, 0.6095, 0.1208]])

Tensor B:
tensor([[0.8841, 0.0487, 0.4038, 0.3893],
        [0.5358, 0.7485, 0.3455, 0.9184],
        [0.2425, 0.9557, 0.4760, 0.6909]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [151]:
# Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
torch.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

# Accessing a GPU

In [153]:
# verify mps support - https://developer.apple.com/metal/pytorch/
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [154]:
torch.cuda.is_available()

False

In [164]:
# toy example mps
import time

device = "mps"

torch.manual_seed(1234)
TENSOR_A_CPU = torch.rand(5000, 5000)
TENSOR_B_CPU = torch.rand(5000, 5000)

torch.manual_seed(1234)
TENSOR_A_MPS = torch.rand(5000, 5000).to(device)
TENSOR_B_MPS = torch.rand(5000, 5000).to(device)

# Warm-up
for _ in range(100):
    torch.matmul(torch.rand(500,500).to(device), torch.rand(500,500).to(device))
    
start_time = time.time()
torch.matmul(TENSOR_A_CPU, TENSOR_B_CPU)
print("CPU : --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
torch.matmul(TENSOR_A_MPS, TENSOR_B_MPS)
print("MPS : --- %s seconds ---" % (time.time() - start_time))

CPU : --- 0.16501903533935547 seconds ---
MPS : --- 0.00024199485778808594 seconds ---
