In [2]:
!pip3 install -q torch==2.0.1 torchvision torchaudio

# 00. PyTorch Fundamentals

In [3]:
import torch
torch.__version__

'2.0.1+cu118'

## Introduction to tensors

### Creating tensors

In [None]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
# Check the dimensions of a tensor using the ndim attribute
scalar.ndim

0

In [None]:
# Get the Python number within a tensor (only works with one-element tensors)
scalar.item()

7

In [None]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [None]:
# Check the number of dimensions of vector
vector.ndim

1

In [None]:
# Check shape of vector
vector.shape

torch.Size([2])

In [None]:
# Matrix
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
# Check number of dimensions
MATRIX.ndim

2

In [None]:
# Check shape of matrix
MATRIX.shape

torch.Size([2, 2])

In [None]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [None]:
# Check number of dimensions for TENSOR
TENSOR.ndim

3

In [None]:
# Check shape of TENSOR
TENSOR.shape

torch.Size([1, 3, 3])

Let's summarise.

| Name | What is it? | Number of dimensions | Lower or upper (usually/example) |
| ---- | ----------- | -------------------- | -------------------------------- |
| scalar | a single number | 0 | Lower(a) |
| vector | a number with direction (e.g. wind speed with direction) but can also have many other numbers | 1 | Lower(y) |
| matrix | a 2-dimensional array of numbers | 2 | Upper(Q) |
| tensor | an n-dimensional array of numbers | can be any number, a 0-dimension tensor is a scalar, a 1-dimension tensor is a vector | Upper(X) |

![](https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/images/00-scalar-vector-matrix-tensor.png)

### Random tensors

In [None]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(size=(3, 4))
random_tensor, random_tensor.dtype

(tensor([[0.5158, 0.8323, 0.7521, 0.2091],
         [0.1330, 0.9256, 0.5851, 0.6520],
         [0.9564, 0.3597, 0.5048, 0.5823]]),
 torch.float32)

In [None]:
# Create a random tensor of size (224, 224, 3)
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [None]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [None]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

### Creating a range and tensor like

In [None]:
# Use torch.arange(), torch.range() is deprecated
# zero_to_ten_deprecated = torch.range(0, 10)  # Note: this may return an error in the future

# Create a range of values 0 to 10
zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# Can also create a tensor of zeros similar to another tensor
ten_zeros = torch.zeros_like(input=zero_to_ten)  # will have same shape
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [None]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None,  # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None,  # defaults to None, which uses the default tensor type
                               requires_grad=False)  # if True, operations performed on the tensor are recorded

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [None]:
# Create tensor with dtype=torch.float16
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16)  # torch.half would also work

float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

## Getting information from tensors

Three of the most common attributes you'll want to find out about tensor are:
- `shape` - what shape is the tenosr? (some operations require specific shpae rules)
- `dtype` - what datatype are the elements within the tensor stored in?
- `device` - what device is the tensor stored on? (usually GPU or CPU)

In [None]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print()
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}")  # will default to CPU

tensor([[0.5107, 0.0596, 0.8988, 0.9590],
        [0.7894, 0.3786, 0.8959, 0.1158],
        [0.0849, 0.3988, 0.5198, 0.5692]])

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


## Manipulating tensors (tensor operations)

These operations are often between:
- Addition
- Substraction
- Multiplication (element-wise)
- Division
- Matrix multiplication

### Basic operations

In [None]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [None]:
# Multiply it by 10
tensor * 10

tensor([10, 20, 30])

In [None]:
# Tensors don't change unless reassigned
tensor

tensor([1, 2, 3])

In [None]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [None]:
# Add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [None]:
# Can also use torch functions
torch.multiply(tensor, 10), torch.mul(tensor, 10)

(tensor([10, 20, 30]), tensor([10, 20, 30]))

In [None]:
# Original tensor is still unchanged
tensor

tensor([1, 2, 3])

In [None]:
# Element-wise multiplication (each element multiplies its equivalent, index 0 -> 0, 1 -> 1, 2 -> 2)
print(tensor, '*', tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


### Matrix multiplication (is all you need)

PyTorch impolements matric multiplication functionality in the `torch.matmul()` method.

The main two rules for matrix multiplication to remember are:
1. The inner dimensions must match:
- `(3, 2) @ (3, 2)` won't work
- `(2, 3) @ (3, 2)` will work
- `(3, 2) @ (2, 3)` will work
2. The resulting matrix has the shape of the outer dimensions:
- `(2, 3) @ (3, 2)` -> `(2, 2)`
- `(3, 2) @ (2, 3)` -> `(3, 3)`

In [None]:
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

The difference between element-wise multiplication and matrix multiplication is the addition of values.

For our `tensor` variable with values `[1, 2, 3]`:

| Operation | Calculation | Code |
| --------- | ----------- | ---- |
| Element-wise multiplication | `[1*1, 2*2, 3*3]` = `[1, 4, 9]` | `tensor * tensor` |
| Matrix multiplication | `[1*1 + 2*2 + 3*3]` = `[14]` | `tensor.matmul(tensor)` |

In [None]:
# Element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [None]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [None]:
# Can also use the "@" symbol for matrix multiplication, though not recomended
tensor @ tensor

tensor(14)

Can do matrix multiplication by hand but it's not recommended.

The in-built `torch.matmul()` method is faster.

In [None]:
%%time
# Matrix multiplication by hand
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

CPU times: user 867 µs, sys: 0 ns, total: 867 µs
Wall time: 822 µs


tensor(14)

In [None]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 642 µs, sys: 0 ns, total: 642 µs
Wall time: 590 µs


tensor(14)

## One of the most common errors in deep learning (shape errors)

In [None]:
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B)  # (this will error)

RuntimeError: ignored

One of the ways to do this is with a **transpose** (switch the dimenstions of a given tensor).

Perform transpose in PyTorch:
- `torch.transpose(input, dim0, dim1)` - where `input` is the desired tensor to transpose and `dim0` and `dim1` are the dimensions to be swapped.
- `tensor.T` - where `tensor` is the desired tensor to transpose.

In [None]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [None]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [None]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimmensions match")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimmensions match
Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [None]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

The `torch.nn.Linear()` module , also known as a feed-forward layer or fully connected layer, implements a matrix multiplication between an input `x` and a weights matrix `A`.
$$y = x \cdot A^T + b$$
Where

- `x` is the input to the layer (deep learning is a stack of layers like `torch.nn.Linear()` and otherson top of each other).
- `A` is the weights matrix created by the layer, this starts out as random numbers that get adjusted as a neural network learns to better represent patterns in the data (notice the "`T`", that's because the weights matrix gets transposed).
- `b` is the bias term used to slightly offset the weights and inputs.
- `y` is the output (a manipulation of the input in the hopes to discover patterns in it).

In [None]:
# Since the linear layer start with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
linear = torch.nn.Linear(in_features=2,  # in_features = matches inner dimmension of input
                         out_features=6)  # out_features = describes outer value

X = tensor_A
output = linear(X)
print(f"Input shape: {X.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[0.9332, 0.8805, 3.0149, 1.5545, 1.8186, 2.0634],
        [1.7186, 1.4009, 3.5818, 1.7408, 2.6017, 2.5123]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([2, 6])


## Finding the min, max, mean, sum, etc (aggregation)

In [None]:
# Create a tensor
X = torch.arange(0, 100, 10)
X

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [None]:
# Perform some aggregation
print(f"Maximum: {X.max()}")
print(f"Minimum: {X.min()}")
# print(f"Mean: {X.mean()}")  # this will error
print(f"Mean: {X.type(torch.float32).mean()}")  # won't work without float datatype
print(f"Sum: {X.sum()}")

Maximum: 90
Minimum: 0
Mean: 45.0
Sum: 450


In [None]:
# Can also do the same as above with `torch` methods
torch.max(X), torch.min(X), torch.mean(X.type(torch.float32)), torch.sum(X)

(tensor(90), tensor(0), tensor(45.), tensor(450))

## Positional min/max

In [None]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Return index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


## Change tensor datatype

In [None]:
# Create a tensor and check its datatype
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [None]:
# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [None]:
# Create a int8 tensor
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

## Reshaping, stacking, squeezing and unsqueezing

There is some popuar methods for reshaping of changing the dimensions of tensors without actually changing the values inside them:

| Method | One-line description |
| ------ | -------------------- |
| `torch.reshape(input, shape)` | Reshapes `input` to `shape` (if compatible), can also use `torch.Tensor.reshape()`. |
| `torch.Tensor.view(shape)` | Returns a view of the original tensor in a different `shape` but shares the same data as the original tensor. |
| `torch.stack(tensors, dim=0)` | Concatenates a sequence of `tensors` along a new dimension (`dim`), all `tensors` must be same size. |
| `torch.squeeze(input)` | Squeezes `input` to remove all the dimensions with value `1`. |
| `torch.unsqueeze(input, dim)` | Returns `input` with a dimension value of `1` added at `dim`. |
| `torch.permute(input, dims)` | Returns a view of the original `input` with its dimensions permuted (rearranged) to `dims`. |

In [None]:
# Create a tensor
X = torch.arange(1., 8.)
X, X.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [None]:
# Add an extra dimension
X_reshaped = X.reshape(1, 7)
X_reshaped, X_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [None]:
# Change view (keeps same data as original but changes view)
# See more: https://stackoverflow.com/a/54507446/7900723
Z = X.view(1, 7)
Z, Z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [None]:
# Changing Z changes X too
Z[:, 0] = 5
Z, X

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [None]:
# Stack tensor on top of each other
X_stacked = torch.stack([X, X, X, X], dim=1)  # Try changing dim to dim=1 and see what happens
X_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.]])

In [None]:
# Squeeze tensor with dimensions with value 1
print(f"Previous tensor: {X_reshaped}")
print(f"Previous shape: {X_reshaped.shape}")

# Remove extra dimension from X_reshape
X_squeezed = X_reshaped.squeeze()

print(f"\nNew tensor: {X_squeezed}")
print(f"New shape: {X_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [None]:
# Add extra dimension with value 1 at a specific index
print(f"Previous tensor: {X_squeezed}")
print(f"Previous shape: {X_squeezed.shape}")

# Add an extra dimension with unsqueeze
X_unsqueezed = X_squeezed.unsqueeze(dim=0)

print(f"\nNew tensor: {X_unsqueezed}")
print(f"New shape: {X_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [None]:
# Create tesnor with specific shape
X_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
X_permuted = X_original.permute(2, 0, 1)  # shifts axis 0 -> 1, 1 -> 2, 2 -> 0

print(f"Previous shape: {X_original.shape}")
print(f"New shape: {X_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


## Indexing (selecting data from tensors)

In [4]:
# Create a tensor
X = torch.arange(1, 10).reshape(1, 3, 3)
X, X.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [5]:
# Let's index bracket by bracket
print(f"First square bracket:\n{X[0]}")
print(f"Second square bracket:\n{X[0][0]}")
print(f"Third square bracket:\n{X[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket:
tensor([1, 2, 3])
Third square bracket:
1


In [6]:
# Get all values of 0th dimension and the 0 index of 1st dimension
X[:, 0]

tensor([[1, 2, 3]])

In [7]:
# Get all values of 0th & 1th dimensions but only index 1 of 2nd dimension
X[:, :, 1]

tensor([[2, 5, 8]])

In [8]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
X[0, 0, :]  # same as X[0][0]

tensor([1, 2, 3])

## PyTorch tensors & NumPy

The two main methods you'll want to use for NumPy to PyTorch (and back again) are:
- `torch.from_numpy(ndarray)` - NumPy array -> PyTorch tensor.
- `torch.Tensor.numpy()` - PyTorch tensor -> NumPy array.

In [9]:
# NumPy array to tensor
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

By default, NumPy arrays are created with the datatype `float64` and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using `float32`.

So if you want to convert your `NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32)`, you can use `tensor = torch.from_numpy(array).type(torch.float32)`.

In [10]:
# Change the array, keep the tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [11]:
# Tensor to NumPy array
tensor = torch.ones(7)  # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy()  # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [12]:
# Change the tensor, keep the array the same
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take the random out of random)

In [13]:
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.8136, 0.1251, 0.5895, 0.2576],
        [0.7585, 0.3206, 0.7998, 0.1770],
        [0.4227, 0.5266, 0.0901, 0.1163]])

Tensor B:
tensor([[0.0389, 0.9004, 0.4821, 0.2906],
        [0.8693, 0.8430, 0.2840, 0.5330],
        [0.9835, 0.2557, 0.4402, 0.9089]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [18]:
import random

# # Set the random sedd
RANDOM_SEED = 17  # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
# torch.random.manual_seed(seed=RANDOM_SEED)  # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.4342, 0.5351, 0.8302, 0.1239],
        [0.0293, 0.5494, 0.3825, 0.5463],
        [0.4683, 0.0172, 0.0214, 0.3664]])

Tensor D:
tensor([[2.0535e-01, 1.9226e-01, 3.5434e-01, 2.1795e-01],
        [1.0574e-04, 1.4056e-01, 6.0028e-01, 5.6578e-01],
        [9.4895e-02, 9.6953e-02, 3.7144e-01, 2.6844e-02]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

## Running tensor on GPUs (and making faster computations)

### 1. Getting a GPU

In [3]:
!nvidia-smi

Sat Jun 24 14:42:14 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### 2. Getting PyTorch to run on the GPU

In [4]:
# Check for GPU
torch.cuda.is_available()

True

In [5]:
# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [6]:
# Count number of devices
torch.cuda.device_count()

1

### 3. Putting tensors (and models) on the GPU

In [7]:
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu


tensor([1, 2, 3], device='cuda:0')

### 4. Moving tensors back to the CPU

In [8]:
# If tensor is on GPU, can't transform it to NumPy (this will error)
tensor_on_gpu.numpy()

TypeError: ignored

In [9]:
# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

In [10]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

## Exercises

1. Documentation reading - A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness). See the documentation on torch.Tensor and for torch.cuda.
2. Create a random tensor with shape (7, 7).
3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).
4. Set the random seed to 0 and do exercises 2 & 3 over again.
5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.
6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).
7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).
8. Find the maximum and minimum values of the output of 7.
9. Find the maximum and minimum index values of the output of 7.
10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.


### Exercise - 1 (Documentation reading)

No code implementation (reading)

### Exercise - 2

In [7]:
# Create random tensor
tensor = torch.rand(size=(7, 7))
tensor, tensor.shape

(tensor([[0.9530, 0.6465, 0.5446, 0.1525, 0.5065, 0.8164, 0.8393],
         [0.7679, 0.5591, 0.8890, 0.0353, 0.5813, 0.1732, 0.9772],
         [0.7006, 0.3454, 0.2829, 0.3437, 0.4770, 0.1971, 0.9200],
         [0.4159, 0.7476, 0.7664, 0.9567, 0.2570, 0.4298, 0.1303],
         [0.1878, 0.8794, 0.5405, 0.5560, 0.4914, 0.4729, 0.3716],
         [0.5362, 0.5159, 0.8568, 0.3463, 0.5504, 0.4342, 0.5602],
         [0.4801, 0.9633, 0.9717, 0.2253, 0.1701, 0.1445, 0.3133]]),
 torch.Size([7, 7]))

### Exercise - 3

In [10]:
# Create another random tensor
tensor_2 = torch.rand(size=(1, 7))
# Perform matrix multiplication with two different tensors
# Y = torch.matmul(tensor, tensor_2)  # will error bacause of shape issues
Y = torch.matmul(tensor, tensor_2.T)
Y, Y.shape

(tensor([[1.7495],
         [1.6619],
         [1.3456],
         [1.2273],
         [1.3040],
         [1.5307],
         [1.0805]]),
 torch.Size([7, 1]))

### Exercise - 4

In [11]:
# Set manual random seed to 0
torch.manual_seed(0)

# Create two random tensors
X = torch.rand(size=(7, 7))
Y = torch.rand(size=(1, 7))

# Matrix multiply tensors
Z = torch.matmul(X, Y.T)
Z, Z.shape

(tensor([[1.8542],
         [1.9611],
         [2.2884],
         [3.0481],
         [1.7067],
         [2.5290],
         [1.7989]]),
 torch.Size([7, 1]))

### Exercise - 5

In [13]:
# Set random seed on the default (current) GPU
torch.cuda.manual_seed(1234)

### Exercise - 6

In [12]:
# Set random seed on CPU
torch.manual_seed(1234)

# Check for access to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# Create two random tensors on GPU
tensor_X = torch.rand(size=(2, 3)).to(device)
tensor_Y = torch.rand(size=(2, 3)).to(device)

tensor_X, tensor_Y

Device: cuda


(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))

### Exercise - 7

In [16]:
# Perform matrix multiplication on GPU tensors from exercise-6
tensor_Z = torch.matmul(tensor_X, tensor_Y.T)
tensor_Z, tensor_Z.shape

(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]], device='cuda:0'),
 torch.Size([2, 2]))

### Exercise - 8

In [19]:
# Find maximum value from tensor of exercise-7
print(f"Maximum of tensor_Z: {tensor_Z.max():.4f}")
# Find minimum value from tensor of exercise-7
print(f"Minimum of tensor_Z: {tensor_Z.min():.4f}")

Maximum of tensor_Z: 0.5617
Minimum of tensor_Z: 0.3647


### Exercise - 9

In [20]:
# Find index of maximum value from tenosr of exercise-7
print(f"Index of maximum of tensor_Z: {torch.argmax(tensor_Z)}")
# Find index of minimum value from tensor of exercise-7
print(f"Index of minimum of tensor_Z: {torch.argmin(tensor_Z)}")

Index of maximum of tensor_Z: 3
Index of minimum of tensor_Z: 0


### Exercise - 10

In [21]:
# Set seed
torch.manual_seed(7)

# Create random tensor
tensor_D = torch.rand(size=(1, 1, 1, 10))

# Remove single dimensions
tensor_E = tensor_D.squeeze()

# Print out tensors
print(tensor_D, tensor_D.shape)
print(tensor_E, tensor_E.shape)

tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) torch.Size([10])
