# Linear regression in PyTorch with manual gradient descent

Markus Enzweiler, markus.enzweiler@hs-esslingen.de

This is a demo used in a Computer Vision & Machine Learning lecture. Feel free to use and contribute.

## Setup

Adapt `packagePath` to point to the directory containing this notebeook.

In [None]:
# Notebook id
nb_id = "linear_regression/torch"

# Imports
import sys
import os

In [None]:
# Package Path (folder of this notebook)

#####################
# Local environment #
#####################

package_path = "./"


#########
# Colab #
#########


def check_for_colab():
    try:
        import google.colab

        return True
    except ImportError:
        return False


# running on Colab?
on_colab = check_for_colab()

if on_colab:
    # assume this notebook is run from Google Drive and the whole
    # cv-ml-lecture-notebooks repo has been setup via setupOnColab.ipynb

    # Google Drive mount point
    gdrive_mnt = "/content/drive"

    ##########################################################################
    # Ensure that this is the same as gdrive_repo_root in setupOnColab.ipynb #
    ##########################################################################
    # Path on Google Drive to the cv-ml-lecture-notebooks repo
    gdrive_repo_root = f"{gdrive_mnt}/MyDrive/cv-ml-lecture-notebooks"

    # mount drive
    from google.colab import drive

    drive.mount(gdrive_mnt, force_remount=True)

    # set package path
    package_path = f"{gdrive_repo_root}/{nb_id}"

# check whether package path exists
if not os.path.isdir(package_path):
    raise FileNotFoundError(f"Package path does not exist: {package_path}")

print(f"Package path: {package_path}")

In [None]:
# Additional imports

# Repository Root
repo_root = os.path.abspath(os.path.join(package_path, "..", ".."))
# Add the repository root to the system path
if repo_root not in sys.path:
    sys.path.append(repo_root)

# Package Imports
from nbutils import requirements as nb_reqs
from nbutils import colab as nb_colab
from nbutils import git as nb_git
from nbutils import exec as nb_exec
from nbutils import data as nb_data

In [None]:
# Install requirements in the current Jupyter kernel
req_file = os.path.join(package_path, "requirements.txt")
nb_reqs.pip_install_reqs(req_file, on_colab)

In [None]:
# Now we should be able to import the additional packages
import torch
import numpy as np
import matplotlib.pyplot as plt

# Set the random seed for reproducibility
torch.manual_seed(0);

## Linear regression

### Create some data based on adding noise to a known linear function

In [None]:
# Creating a function f(x) with a slope of 2 and bias of 1, e.g. f(x) = 2x + 1
# and added Gaussian noise

# True parameters
w_true = 2
b_true = 1

X = torch.arange(-5, 5, 0.1)
Y = w_true * X + b_true + 2 * torch.randn(X.shape)

# Visualize
plt.scatter(X, Y, alpha=0.5)
plt.title("Scatter plot of f(x) = 2x + 1 + Gaussian noise")
plt.show()

### Linear model and loss

Our linear regression model is $ y = f(x) = w \cdot x + b$. We solve for $w$ and $b$ using gradient descent. 

In [None]:
def lin_model(w, b, x):
    return w * x + b

We uses mean squared error loss between the predictions $\hat{y_i}$ of our model and true values $y_i$:

$$L(w,b) = \frac{1}{N} \sum_{i=1}^{N} (y_i - \hat{y_i})^2= \frac{1}{N} \sum_{i=1}^{N} (y_i - (w \cdot x_i + b))^2$$


In [None]:
def loss_fn(y_pred, y):
    return torch.mean(torch.square(y - y_pred))

### Optimization via gradient descent

The gradients of the loss $L(w,b)$ with respect to both $w$ and $b$ are (application of chain rule):



$$\frac{\partial L}{\partial w} = \frac{1}{N} \sum_{i=1}^{N} 2(y_i - (w \cdot x_i + b)) \cdot \frac{\partial}{\partial w} (y_i - (w  \cdot x_i + b))$$

$$ = \frac{1}{N} \sum_{i=1}^{N} 2(y_i - (w \cdot  x_i + b)) \cdot (-x_i)$$ 

$$ = \frac{1}{N} \sum_{i=1}^{N} -2(y_i - w \cdot x_i - b) \cdot x_i$$

---

$$\frac{\partial L}{\partial b} = \frac{1}{N} \sum_{i=1}^{N} 2(y_i - (w \cdot x_i + b)) \cdot \frac{\partial}{\partial b} (y_i - (w \cdot x_i + b))$$

$$ = \frac{1}{N} \sum_{i=1}^{N} 2(y_i - (w \cdot x_i + b)) \cdot (-b)$$

$$ = \frac{1}{N} \sum_{i=1}^{N} -2(y_i - w \cdot x_i - b)$$


 






In [None]:
def grad_loss_w(x, y, w, b):
    return torch.mean(-2 * (y - w*x -b) * x)

def grad_loss_b(x, y, w, b):
    return torch.mean(-2 * (y - w*x -b))

Initialize parameters $w$ and $b$ randomly.

In [None]:
w = 1e-2 * torch.randn(1, dtype=torch.float32)
b = 1e-2 * torch.randn(1, dtype=torch.float32)

print(f"Initial weight : {w}")
print(f"Initial bias   : {b}")

Optimize via gradient descent

In [None]:
# Hyperparameters
num_iters = 10000
learning_rate = 3e-4

# Loop over the number of iterations
for it in range(num_iters):

    # predict y from x
    Y_pred = lin_model(w, b, X)

    # Compute the loss
    loss = loss_fn(Y_pred, Y)

    # Gradient of loss function w.r.t parameters
    grad_w = grad_loss_w(X, Y, w, b)
    grad_b = grad_loss_b(X, Y, w, b)

    # update parameters via gradient descent update rules
    with torch.no_grad():
        w -= learning_rate * grad_w
        b -= learning_rate * grad_b

    # Give some status output once in a while
    if it % 500 == 0 or it == num_iters - 1:      
        print(
            f"Iteration {it:5d} | Loss {loss.item():>10.5f} | "
            f"w {w.item():> 8.5f} | b {b.item():> 8.5f}"
        )

print(f"Final weight after optimization : {w.item():.5f} (true: {w_true})")
print(f"Final bias after optimization   : {b.item():.5f} (true: {b_true})")

Visualize linear fit

In [None]:
# Visualize
plt.scatter(X, Y, alpha=0.5)
plt.title("Scatter plot of f(x) = 2x + 1 + Gaussian noise")

# Plot the recovered line
Y_model = lin_model(w, b, X)
plt.plot(X.tolist(), Y_model.tolist(), color="red")

plt.legend(["data", f"f(x) = {w.item():.3f}x + {b.item():.3f}"])
plt.show()