# Least-Square Regression

In this programming exercise, you will implement the least-square regression algorithm using only numpy. DO NOT use libraries like scikit-learn or scipy.

Use the template provided in this notebook to implement least-square regression and visualize the results.

When done, paste the code into the moodle-quiz.

In [1]:
# importing packages
import numpy as np
from numpy.typing import ArrayLike
import matplotlib.pyplot as plt

In [2]:
# Importing data


def load_data(name: str) -> tuple[ArrayLike, ArrayLike]:
    """Loads data from provided .npy files and returns the x and y values.

    Args:
        name (str): The file name of the .npy file to load.

    Returns:
        tuple[ArrayLike, ArrayLike]: The x and y values of the data. x and y have shape (n,).
    """
    data = np.load(name)
    x, y = data.T
    return x, y

In [3]:
# Plotting the data and the regression model.


def plot(x: ArrayLike, y: ArrayLike, w: ArrayLike = None):
    """Plot the data and linear regression model.

    Only for plotting 2D data.

    Args:
        x (ArrayLike): The input data of shape (n,).
        y (ArrayLike): The output data of shape (n,).
        w (ArrayLike, optional): The weight and bias of a linear regression. Defaults to None.
    """

    # Plot the data
    plt.scatter(x, y, color='blue', alpha=0.5)

    # TODO: also plot the prediction
    plt.plot(x, w[0] * x + w[1], color='red')
    plt.xlabel('x')
    plt.ylabel('y_pred')
    plt.title('Linear Regression')
    plt.legend(['Data', 'Prediction'])
    plt.show()

In [9]:
# Least-Squares Regression


def regression(x: ArrayLike, y: ArrayLike) -> ArrayLike:
    """Calculates the linear least-squares regression coefficients.

    Args:
        x (ArrayLike): The input data of shape (n,).
        y (ArrayLike): The output data of shape (n,).

    Returns:
        ArrayLike: The linear regression coefficients.
    """

    # : Implement least-squares regression
    X = np.vstack((x, np.ones(len(x)))).T
    w = X.T @ y @ np.linalg.inv(X.T @ X)
    return w

In [5]:
datasets = ["dataset0.npy", "dataset1.npy", "dataset2.npy", "dataset3.npy"]

for dataset in datasets:
    x, y = load_data(dataset)
    w = regression(x, y)
    plot(x, y, w)