In [2]:
import torch
print(torch.__version__)

2.6.0.dev20241126


In [3]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
                    
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [4]:
print(TENSOR.ndim)
print(TENSOR.shape)

3
torch.Size([1, 3, 3])


In [5]:
TENSOR2 = torch.tensor([[[[1, 2, 3],
                          [4, 3, 4],
                          [5, 6, 8],
                          [34, 32, 41]]]])

TENSOR2

tensor([[[[ 1,  2,  3],
          [ 4,  3,  4],
          [ 5,  6,  8],
          [34, 32, 41]]]])

In [6]:
TENSOR2.ndim

4

## Random Tensors

`Start with random numbers -> look at the data -> update random numbers -> look at the data -> update the random numbers`

RT are important because many neural networks learn by starting with tensors full of random numbers and adjusting them accordingly to better represent the data.

In [10]:
# create a random tensor of size (3, 4)
# random_tensor = torch.rand(3, 4)
# random_tensor = torch.rand(10, 10, 10)
random_tensor = torch.rand(2, 3, 4)
print(random_tensor)

tensor([[[0.0307, 0.7272, 0.5098, 0.9755],
         [0.4789, 0.2242, 0.1725, 0.5319],
         [0.5684, 0.9726, 0.5372, 0.7388]],

        [[0.3935, 0.2984, 0.0880, 0.9692],
         [0.4902, 0.1048, 0.2991, 0.0354],
         [0.1259, 0.7187, 0.9515, 0.2078]]])


In [12]:
# create a random tensor with similiar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, colour channel i.e (R, G, B)
print(random_image_size_tensor.ndim, random_image_size_tensor.shape)
print(random_image_size_tensor)


3 torch.Size([224, 224, 3])
tensor([[[0.9164, 0.6061, 0.8044],
         [0.7019, 0.4983, 0.0997],
         [0.0102, 0.3503, 0.3070],
         ...,
         [0.9919, 0.3353, 0.7219],
         [0.5263, 0.2229, 0.1095],
         [0.8955, 0.7359, 0.6346]],

        [[0.9739, 0.2160, 0.4131],
         [0.4625, 0.2808, 0.8474],
         [0.2228, 0.5357, 0.0609],
         ...,
         [0.4825, 0.0922, 0.5089],
         [0.9462, 0.5617, 0.1893],
         [0.7114, 0.8838, 0.7267]],

        [[0.3903, 0.1281, 0.8544],
         [0.5915, 0.5107, 0.5544],
         [0.2310, 0.6301, 0.9406],
         ...,
         [0.3744, 0.4586, 0.9145],
         [0.7025, 0.1647, 0.4733],
         [0.7352, 0.6894, 0.2312]],

        ...,

        [[0.9406, 0.1016, 0.5728],
         [0.7158, 0.1552, 0.7612],
         [0.1611, 0.1290, 0.3042],
         ...,
         [0.8770, 0.8570, 0.9029],
         [0.4407, 0.0389, 0.5584],
         [0.5627, 0.9752, 0.4230]],

        [[0.4264, 0.8269, 0.3673],
         [0.1533, 0

## Zeros and ones

In [19]:
# create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [20]:
ones = torch.ones(size=(3,4), dtype=int)
ones, ones.dtype

(tensor([[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]),
 torch.int64)

## Range

`torch.arange(start, end, step)` can help in creating a range of numbers

In [21]:
zero_to_ten_deprecated = torch.arange(0, 10, 1)
zero_to_ten_deprecated


tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
ten_zeros = torch.zeros_like(input=zero_to_ten_deprecated)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor datatypes

**Note:** Tensor datatype is one of the 3 big errors you'll run into with PyTorch and Deep Learning
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

In [23]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                                dtype=torch.float32,  # what datatype is the tensor (like float32 or float16 (half precision))
                                # why to use anything other than the default i.e float32 ? Ans -> to save memory and computation
                                # As float16 calculation will be faster but less detailed the number will be. Where float64 will contain high detail but computation will slow down
 # what device is your tensor on
                                requires_grad=False) # whether or not to track gradients with this tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [24]:
float_16_tensor = torch.tensor([3, 6, 33], dtype=torch.half)
float_16_tensor

tensor([ 3.,  6., 33.], dtype=torch.float16)

In [25]:
float_16_tensor * float_32_tensor

tensor([  9.,  36., 297.])

In [26]:
int32_tensor = torch.tensor([4, 5, 6], dtype=torch.int32)
int32_tensor

tensor([4, 5, 6], dtype=torch.int32)

In [27]:
float_32_tensor * int32_tensor

tensor([12., 30., 54.])

### Getting information from tensors

1. Tensors not right datatype - to get datatype from a tensor, use `tensor.dtype`
2. Tensors not right shape - to get shape from a tensor, use `tensor.shape`
3. Tensors not on the right device - to get device from a tensor, use `tensor.device`

In [28]:
someTensor = torch.rand(4,4, dtype=torch.half, device="mps")
someTensor

tensor([[0.2422, 0.4258, 0.5635, 0.6846],
        [0.2881, 0.8203, 0.1514, 0.9375],
        [0.0820, 0.7773, 0.3877, 0.7432],
        [0.1904, 0.5410, 0.4639, 0.6152]], device='mps:0', dtype=torch.float16)

In [29]:
print(f"datatype:  {someTensor.dtype}")
print(f"Shape: {someTensor.shape}")
print(f"Device tensor on: {someTensor.device}")

datatype:  torch.float16
Shape: torch.Size([4, 4])
Device tensor on: mps:0


### Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Division
* Multiplication (element-wise)
* Matrix Multiplication

In [30]:
# create a tensor
tensor_1 = torch.tensor([2, 3, 4])
tensor_1, tensor_1 + 10

(tensor([2, 3, 4]), tensor([12, 13, 14]))

In [31]:
tensor_1 * 10

tensor([20, 30, 40])

In [32]:
tensor_1 - 10

tensor([-8, -7, -6])

In [33]:
torch.mul(tensor_1, 10)

tensor([20, 30, 40])

### Matrix Multiplication (dot product)

In [40]:
tensor_1, tensor_1 * tensor_1, tensor_1

(tensor([2, 3, 4]), tensor([ 4,  9, 16]), tensor([2, 3, 4]))

In [54]:
# matrix multiplication

torch.matmul(tensor_1, tensor_1)

tensor(29)

In [55]:
%%time
# matrix multiplication by hand

values = 0
for i in range(len(tensor_1)):
  values = values + tensor_1[i] * tensor_1[i]
print(values)

tensor(29)
CPU times: user 571 µs, sys: 610 µs, total: 1.18 ms
Wall time: 745 µs


In [56]:
%%time
torch.matmul(tensor_1, tensor_1)

CPU times: user 82 µs, sys: 24 µs, total: 106 µs
Wall time: 107 µs


tensor(29)

In [57]:
%%time
tensor_1 @ tensor_1

CPU times: user 117 µs, sys: 24 µs, total: 141 µs
Wall time: 141 µs


tensor(29)

### One of the most common errors are shape errors

1. The **inner dimensions** must match:
* `[3, 2] @ [3, 2]` won't work
* `[2, 3] @ [3 ,2]` will work
* `[3, 2] @ [2, 3]` will work
2. The **outer dimensions** will be the resulting matrix:
* `[2, 3] @ [3, 2] -> [2, 2]`

In [58]:
%%time
torch.matmul(torch.rand(3,2), torch.rand(2,3))
torch.matmul(torch.rand(10, 3), torch.rand(3, 10)).size()

CPU times: user 674 µs, sys: 763 µs, total: 1.44 ms
Wall time: 1.01 ms


torch.Size([10, 10])

In [59]:
# shapes for matrix multiplication
tensor_a = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])

tensor_b = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

In [60]:
torch.matmul(tensor_a, tensor_b)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [61]:
tensor_a.dtype, tensor_b.dtype

(torch.int64, torch.int64)

To fix tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**.

A **transpose** switches the axes or dimensions of a given tensor.

In [62]:
tensor_b.T, tensor_b.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [63]:
torch.matmul(tensor_a, tensor_b.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Finding the min, max, mean, sum etc (tensor aggregation)

In [64]:
X = torch.arange(0, 150, 10)
X

tensor([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
        140])

In [65]:
## finding the min
X.min(), torch.min(X)

(tensor(0), tensor(0))

In [66]:
# finding the max
X.max(), torch.max(X)

(tensor(140), tensor(140))

In [67]:
# finding the mean: 

# NOTE: torch.mean() function requires a tensor of float32 or complex datatype to work

# torch.mean(X, dtype=torch.float32)
X.type(torch.float32).mean(), torch.mean(X.type(torch.float32))

# both of the above will work

(tensor(70.), tensor(70.))

In [68]:
# find the sum

torch.sum(X), X.sum()

(tensor(1050), tensor(1050))

### Argmin, argmax

In [69]:
# finding the positional min and max - argmin and argmax

X.argmin()

tensor(0)

In [70]:
X.argmax(), X[14]

(tensor(14), tensor(140))

### Reshaping, viewing and stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all '1' dimensions from a tensor
* Unsqueeze - add a '1' dimension to a targer tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [41]:
import torch
x = torch.arange(1., 10.)
x, x.shape, x.dtype

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]), torch.float32)

In [42]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.size()

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [43]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [44]:
# NOTE: chaning z changes x : as a view of a tensor shares the same memory as the original tensor

z[:, 0] = 12
z, x

(tensor([[12.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]),
 tensor([12.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]))

In [45]:
x[0] = 123
z, x

(tensor([[123.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.]]),
 tensor([123.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.]))

In [46]:
# stack tensors on top of each other
x_stack = torch.stack([x, x, x, x], dim=1)
x_stack

tensor([[123., 123., 123., 123.],
        [  2.,   2.,   2.,   2.],
        [  3.,   3.,   3.,   3.],
        [  4.,   4.,   4.,   4.],
        [  5.,   5.,   5.,   5.],
        [  6.,   6.,   6.,   6.],
        [  7.,   7.,   7.,   7.],
        [  8.,   8.,   8.,   8.],
        [  9.,   9.,   9.,   9.]])

In [47]:
# torch.squeeze() - removes all single dimensions from a target tensor
x_reshaped

tensor([[123.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.]])

In [48]:
x_reshaped.shape

torch.Size([1, 9])

In [49]:
x_reshaped.squeeze(), x_reshaped.squeeze().shape

(tensor([123.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.]),
 torch.Size([9]))

In [54]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim
x_squeezed = x_reshaped.squeeze()
print(x_squeezed.shape)

x_unsqueezed = x_squeezed.unsqueeze(dim=1)
print(x_unsqueezed.shape, x_unsqueezed)

torch.Size([9])
torch.Size([9, 1]) tensor([[123.],
        [  2.],
        [  3.],
        [  4.],
        [  5.],
        [  6.],
        [  7.],
        [  8.],
        [  9.]])


In [81]:
# permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(128, 128, 3))


# permute the original tensor to rearrange the axes or (dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0
x_permuted.shape


torch.Size([3, 128, 128])

In [82]:
x_original[0, 0, 0] = 696969
x_original[0, 0, 0], x_permuted[0, 0,0]

(tensor(696969.), tensor(696969.))

In [2]:
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [3]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [4]:
x[0][1]

tensor([4, 5, 6])

In [7]:
x[0][0][2], x[:, 2, 2]

(tensor(3), tensor([9]))

In [87]:
x[:, :, 1]

tensor([[2, 5, 8]])

## PyTorch and Numpy

* Data in Numpy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> Numpy : `torch.Tensor.numpy()`

In [None]:
import torch
import numpy as np

array = np.arange(1.0, 10.0, 1);
tensor = torch.from_numpy(array) # to convert use .type(float32)
tensor, array

# numpy default datatype is float64 or int64 as per given

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64),
 array([1., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [96]:
array.dtype

dtype('float64')

In [97]:
## pytorch default datatype is float32
torch.arange(1.0, 10.0).dtype

torch.float32

In [99]:
# change the value of the array, will the changes reflect to 'tensor'?
array = array + 10
array, tensor

# No the memory won't change for it

(array([21., 22., 23., 24., 25., 26., 27., 28., 29.]),
 tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64))

In [101]:
# Tensor to Numpy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [103]:
numpy_tensor.dtype

# tensor default was float32 so numpy_tensor also changed to that one

dtype('float32')

In [106]:
# Change the tensor, will it change the numpy_tensor?
tensor = tensor + 10
tensor, numpy_tensor

# doesn't change -> so doesn't share memory

(tensor([31., 31., 31., 31., 31., 31., 31.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility

trying to take the random out of random

To reduce the randomness in neural networks and PyTorch comes the concept of a **random seed**

Random seed flavours the the randomness

In [120]:
import torch
# create two random tensor
randomTensorA = torch.rand(3, 4)

randomTensorB = torch.rand(3, 4)

print(randomTensorA)
print(randomTensorB)
print(randomTensorA == randomTensorB)

tensor([[0.9569, 0.3555, 0.2324, 0.6946],
        [0.0766, 0.3048, 0.2691, 0.7810],
        [0.2890, 0.3962, 0.4383, 0.6167]])
tensor([[0.4306, 0.0514, 0.5880, 0.5372],
        [0.7402, 0.0498, 0.9696, 0.3312],
        [0.9164, 0.1140, 0.5826, 0.4889]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [5]:
# reproducible tensors
import torch
# create the random seed
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
randomTensorC = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
randomTensorD = torch.rand(3, 4)

print(randomTensorC)
print(randomTensorD)
print(randomTensorC == randomTensorD)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch

In [21]:
device = "mps"
torch.cuda.device_count()

0

In [22]:
tensor = torch.tensor([1, 2, 3])

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [25]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='mps:0')

### If tensor is on GPU, can't transform it to Numpy

In [28]:
tensor_on_gpu.cpu().numpy()

array([1, 2, 3])

In [29]:
tensor_on_gpu

tensor([1, 2, 3], device='mps:0')