In [108]:
import os
import sys
sys.path.append(os.path.join(
    os.getcwd(),
    '..',
    'src'))
from utils import *

In [2]:
import torch
print(torch.__version__)

2.0.0


## Torch basics

In [9]:
vector = torch.tensor([7,7])
vector.shape, vector.ndim

(torch.Size([2]), 1)

In [10]:
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX.shape, MATRIX.ndim

(torch.Size([2, 2]), 2)

In [12]:
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,5]]])
TENSOR.shape, TENSOR.ndim

(torch.Size([1, 3, 3]), 3)

In [13]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

In [15]:
TENSOR[0][0]

tensor([1, 2, 3])

In [16]:
TENSOR[0][0][0]

tensor(1)

In [19]:
var = torch.tensor([[[1,2,3],
                     [4,5,6],
                     [7,8,9]],
                    [[1,2,3],
                    [4,5,6],
                    [7,8,9]]])
var.shape, var.ndim

(torch.Size([2, 3, 3]), 3)

### Random Tensors
Random tensors are important because neural networks learn by starting with tensors full of random numbers that are slowly adjusted to better represent the data.

In [22]:
random_tensor = torch.rand(3, 3,4)
random_tensor, random_tensor.shape, random_tensor.ndim

(tensor([[[0.9008, 0.3126, 0.5461, 0.2034],
          [0.4481, 0.3491, 0.6650, 0.0180],
          [0.7232, 0.7312, 0.6140, 0.8364]],
 
         [[0.7594, 0.8230, 0.0320, 0.2803],
          [0.6889, 0.2488, 0.6385, 0.3372],
          [0.4214, 0.0788, 0.9248, 0.5670]],
 
         [[0.3019, 0.6215, 0.2993, 0.4001],
          [0.4399, 0.0860, 0.5321, 0.0252],
          [0.9851, 0.1950, 0.8599, 0.5815]]]),
 torch.Size([3, 3, 4]),
 3)

In [23]:
image_tensor = torch.rand(size=(224, 224, 3)) #Ht, Width, Color channels
image_tensor.shape, image_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [24]:
zeros = torch.zeros(size=(3,4))
zeros.shape, zeros.ndim, zeros

(torch.Size([3, 4]),
 2,
 tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]))

In [25]:
ones = torch.ones(size=(3,4))
ones.shape, ones.ndim, ones

(torch.Size([3, 4]),
 2,
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

A range of tensors and tensors-like

In [28]:
torch.arange(0,12, 2)

tensor([ 0,  2,  4,  6,  8, 10])

In [29]:
one_to_ten = torch.zeros_like(input = torch.arange(1,11))
one_to_ten

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [32]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None,
                               device = None,
                               requires_grad = False #Whether to track gradient for this tensor
                               )
float_32_tensor.dtype

torch.float32

Though dtype is specified as None, the default for PyTorch is float32.

The 3 main types of errors
1. Tensors not right data type -  use `tensor.dtype`
2. Tensors not the right shape - use `tensor.shape`
3. Tensors not on the right device -  use `tensor.device`

In [33]:
float_16_tensor=float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

Get important tensor attributes

In [34]:
float_16_tensor.dtype, float_16_tensor.shape, float_16_tensor.device

(torch.float16, torch.Size([3]), device(type='cpu'))

## Tensor operations

In [39]:
tensor = torch.tensor([1,2,3])
tensor + 10, tensor * 10, tensor %10

(tensor([11, 12, 13]), tensor([10, 20, 30]), tensor([1, 2, 3]))

Element-wise multiplication

In [40]:
tensor, tensor * tensor

(tensor([1, 2, 3]), tensor([1, 4, 9]))

Matirx-wise multiplication (dot-product)
* Note that the inner dimensions must match! i.e. a 3x2 matrix can be multiplied with a 2x3 matrix, not a 3x2 matrix.

In [41]:
torch.matmul(tensor, tensor)

tensor(14)

In [6]:
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

print(tensor_A.shape, tensor_B.shape,'\n')
torch.mm(tensor_A, tensor_B.T)

torch.Size([3, 2]) torch.Size([3, 2]) 



tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

## Tensor aggregation (min, max, mean, sum, etc.)

In [7]:
X = torch.arange(0,100, 10)
X

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [8]:
X.min(), X.max()

(tensor(0), tensor(90))

In [9]:
torch.mean(X)

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In this case the tensor X is of the data type 'int64' (or LongTensor). The `torch.mean` function cannot work on integer type tensors.

In [11]:
torch.mean(X.type(torch.float32))

tensor(45.)

In [14]:
torch.sum(X)

tensor(450)

Returns the index of the maximum values on different axis.

In [16]:
tensor_A.shape

torch.Size([3, 2])

In [15]:
torch.argmax(tensor_A, axis=0), torch.argmax(tensor_A, axis=1)

(tensor([2, 2]), tensor([1, 1, 1]))

Reshaping, stacking, squeezing and unsqueezing tensors
* Reshape - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of a certain shape but keep the same memory as the original tensor
* Stacking - concatenate tensors together (vertically/horizontally)
* Squeeze - removes all `1 ` dimension from a target tensor
* Unsqueeze - Adds a `1` dimension to a target tensor
* Permute - Returns a view of the input with the dimensions permuted (swapped) in a certain way

In [25]:
X = torch.arange(1.,10.)
X, X.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [29]:
X_reshaped = X.reshape(1,9)
X_reshaped, X_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [30]:
z = X.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

Stack tensors on top of each other

In [32]:
X_stacked = torch.stack([X, X, X], dim=0)
X_stacked

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [33]:
X_stacked = torch.stack([X, X, X], dim=1)
X_stacked

tensor([[1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [9., 9., 9.]])

In [34]:
X_vstacked = torch.vstack([X, X, X])
X_vstacked

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [35]:
X_hstacked = torch.hstack([X, X, X])
X_hstacked

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9., 1., 2., 3., 4., 5., 6., 7., 8., 9.,
        1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [36]:
X_reshaped.squeeze().shape

torch.Size([9])

In [42]:
X_reshaped.unsqueeze(dim=0 # Limited by the number of dimensions of original tensor
                     ).shape

torch.Size([1, 1, 9])

In [43]:
X_reshaped.unsqueeze(dim=99 # Limited by the number of dimensions of original tensor
                     ).shape

IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 99)

In [44]:
X_reshaped.unsqueeze(dim=-1 # Limited by the number of dimensions of original tensor
                     ).shape

torch.Size([1, 9, 1])

In [46]:
x = torch.rand(size=(224,224,3))
x.shape

torch.Size([224, 224, 3])

In [47]:
x_permuted = torch.permute(x, 
                           (2,0,1) #shifs axis 0->1, 1->2, 2->0
                           )
x_permuted.shape


torch.Size([3, 224, 224])

## Selecting data from tensors
It's very similar to indexing with numpy

In [49]:
x = torch.arange(1,10).reshape(1,1,3,3)
x, x.shape

(tensor([[[[1, 2, 3],
           [4, 5, 6],
           [7, 8, 9]]]]),
 torch.Size([1, 1, 3, 3]))

In [53]:
x[0], x[0][0], x[0][0][0], x[0][0][0][0]

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor(1))

In [54]:
x[:, :, 1]

tensor([[[4, 5, 6]]])

In [60]:
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [61]:
x[:, :, 1],x[:, 1, 1], x[0, 0, :]

(tensor([[2, 5, 8]]), tensor([5]), tensor([1, 2, 3]))

In [77]:
x[0][2][-1], x[:, :, 2]

(tensor(9), tensor([[3, 6, 9]]))

PyTorch tensors & numpy

In [82]:
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [86]:
tensor2 = torch.from_numpy(array).type(torch.float32)
tensor2

tensor([1., 2., 3., 4., 5., 6., 7.])

> Note: The default data type of numpy is float 64 but the default data type of torch is float 32.

### Seeding for reproducibility

In [91]:
SEED = 42

In [90]:
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)
random_tensor_A, random_tensor_B, random_tensor_A == random_tensor_B

(tensor([[0.1053, 0.2695, 0.3588, 0.1994],
         [0.5472, 0.0062, 0.9516, 0.0753],
         [0.8860, 0.5832, 0.3376, 0.8090]]),
 tensor([[0.5779, 0.9040, 0.5547, 0.3423],
         [0.6343, 0.3644, 0.7104, 0.9464],
         [0.7890, 0.2814, 0.7886, 0.5895]]),
 tensor([[False, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]))

In [99]:
torch.manual_seed(SEED)

random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

random_tensor_A, random_tensor_B, random_tensor_A == random_tensor_B

(tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]]),
 tensor([[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]),
 tensor([[False, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]))

In [98]:
torch.manual_seed(SEED)
random_tensor_A = torch.rand(3,4)

torch.manual_seed(SEED)
random_tensor_B = torch.rand(3,4)

random_tensor_A, random_tensor_B, random_tensor_A == random_tensor_B

(tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]]),
 tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]]),
 tensor([[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]))

Setting a manual seed only works for one block of code unfortunately. So you're going to need to continue using this line.

### Running PyTorch on GPUs

In [117]:
device = get_pytorch_device()
device

'mps'

In [118]:
tensor = torch.tensor([1,2,3])
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [116]:
tensor_on_device = tensor.to(device)
print(tensor_on_device, tensor_on_device.device)

tensor([1, 2, 3], device='mps:0') mps:0


### Note that numpy operations only work on CPU

In [120]:
tensor_back_on_cpu = tensor_on_device.to('cpu').numpy()
tensor_back_on_cpu

array([1, 2, 3])

## PyTorch exercises
https://github.com/mrdbourke/pytorch-deep-learning/blob/main/extras/exercises/00_pytorch_fundamentals_exercises.ipynb

In [125]:
# Create a random tensor with shape (7,7)
import torch

tensor = torch.rand(7,7)
tensor, tensor.shape

(tensor([[0.1374, 0.2331, 0.9578, 0.3313, 0.3227, 0.0162, 0.2137],
         [0.6249, 0.4340, 0.1371, 0.5117, 0.1585, 0.0758, 0.2247],
         [0.0624, 0.1816, 0.9998, 0.5944, 0.6541, 0.0337, 0.1716],
         [0.3336, 0.5782, 0.0600, 0.2846, 0.2007, 0.5014, 0.3139],
         [0.4654, 0.1612, 0.1568, 0.2083, 0.3289, 0.1054, 0.9192],
         [0.4008, 0.9302, 0.6558, 0.0766, 0.8460, 0.3624, 0.3083],
         [0.0850, 0.0029, 0.6431, 0.3908, 0.6947, 0.0897, 0.8712]]),
 torch.Size([7, 7]))

In [123]:
tensor2 = torch.rand(1,7)
tensor2, tensor2.shape
# torch.matmul(tensor, tensor2)

(tensor([[0.1115, 0.2477, 0.6524, 0.6057, 0.3725, 0.7980, 0.8399]]),
 torch.Size([1, 7]))

In [128]:
torch.matmul(tensor, tensor2.T)

tensor([[1.2113],
        [0.8848],
        [1.4790],
        [1.1305],
        [1.2989],
        [1.6127],
        [1.7285]])

In [131]:
SEED = 0

torch.manual_seed(SEED)
tensor = torch.rand(7,7)

torch.manual_seed(SEED)
tensor2 = torch.rand(1,7)

torch.matmul(tensor, tensor2.T)

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])

Setting seed to accelerator device

In [134]:
torch.mps.manual_seed(SEED)

In [140]:
SEED = 1234
device = get_pytorch_device()

torch.manual_seed(SEED)
t1 = torch.rand(2,3).to(device)
t2 = torch.rand(2,3).to(device)

device, t1, t2

('mps',
 tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='mps:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='mps:0'))

In [142]:
mul = torch.matmul(t1, t2.T)
mul, mul.shape

(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]], device='mps:0'),
 torch.Size([2, 2]))

In [147]:
torch.max(mul), torch.min(mul)

(tensor(0.5617, device='mps:0'), tensor(0.3647, device='mps:0'))

In [153]:
torch.argmax(mul), torch.argmin(mul)

(tensor(3, device='mps:0'), tensor(0, device='mps:0'))

In [154]:
SEED = 7
torch.manual_seed(SEED)

tensor = torch.rand(1,1,1,10)
tensor_squeezed = torch.squeeze(tensor)

tensor.shape, tensor_squeezed.shape

(torch.Size([1, 1, 1, 10]), torch.Size([10]))

In [155]:
tensor, tensor_squeezed

(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
         0.8513]))