# 1. Imports

In [2]:
# importing other dependencies
import numpy as np

In [3]:
# importing PyTorch
import torch

# checks whether MPS is available
print(torch.backends.mps.is_available())

# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

# setting the device to "mps" instead of default "cpu"
device = torch.device("mps" if torch.backends.mps.is_available else "cpu")

True
True


# 2. Backpropagation

**Backpropagation** consists of 3 steps:
1. **Forward Pass:** Compute Loss
2. **Compute Local Gradients**
3. **Backward Pass:** Compute $\frac{\partial \text{ loss}}{\partial \text{ weight}}$ using the Chain Rule

# 3.1. Example: Manual Calculation

- ![](https://i.postimg.cc/rmG15524/image.png)
- ![](https://i.postimg.cc/9FsDFzvH/image.png)
- ![](https://i.postimg.cc/HLzW5LPx/image.png)

# 3.2. Example: PyTorch Calculation

In [8]:
# we have the training data (x,y) = (1,2)
x = torch.tensor(1.0)
y = torch.tensor(2.0)

# we have the initial weight
w = torch.tensor(1.0, requires_grad=True)

In [9]:
# forward pass to compute loss
y_predicted = w * x
loss = (y_predicted - y)**2
print(loss)

# backward pass to compute gradient dLoss/dw
loss.backward()
print(w.grad)

# this is same as the computation done above (manually) i.e. we get the value -2

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


In [10]:
# while training a neural network, we continue doing this:

# update weights
# next forward and backward pass...

# continue optimizing:
# update weights, this operation should not be part of the computational graph
with torch.no_grad():
    w -= 0.01 * w.grad
# don't forget to zero the gradients
w.grad.zero_()

# next forward and backward pass...
print(w)

# we get a slightly higher weight after the first update

tensor(1.0200, requires_grad=True)
