In [1]:
import numpy as np
from numpy import ndarray
from typing import Callable, Dict, Tuple, List

In [2]:
# Linear Regression
def forward_linear_regression(X_batch: ndarray, y_batch: ndarray, weights: Dict[str, ndarray]) -> Tuple[float, Dict[str, ndarray]]:
    assert X_batch.shape[0] == y_batch.shape[0], "X and y batches number of rows do not match"
    assert X_batch.shape[1] == weights["W"].shape[0], "X batch number of columns and weights number of rows do not match"
    assert weights["B"].shape[0] == weights["W"].shape[1] == 1, "B number of rows and W number of columns different from 1"

    N = np.dot(X_batch, weights["W"])
    P = N + weights["B"]
    loss = np.mean(np.power(y_batch - P, 2))

    # save the information computed on the forward pass
    forward_info: Dict[str, ndarray] = {}
    forward_info['X'] = X_batch
    forward_info['N'] = N
    forward_info['P'] = P
    forward_info['y'] = y_batch

    return loss, forward_info

X_batch = np.array([
    [0, 1, 2], # X1
    [3, 4, 5], # X2
    [5, 6, 7], # X3
])

weights = {
    "B": np.array([2]), # will be broadcasted (?) as if np.array([[2], [2], [2]])
    "W": np.array([
        # W1
        [1], 
        [2], 
        [3],
    ]),
}

y_batch = np.array([
    [11],
    [27],
    [41],
])

loss, forward_info = forward_linear_regression(X_batch, y_batch, weights)
loss, forward_info

(1.0,
 {'X': array([[0, 1, 2],
         [3, 4, 5],
         [5, 6, 7]]),
  'N': array([[ 8],
         [26],
         [38]]),
  'P': array([[10],
         [28],
         [40]]),
  'y': array([[11],
         [27],
         [41]])})

In [3]:
# Calculating the Gradients
def loss_gradients(forward_info: Dict[str, ndarray], weights: Dict[str, ndarray]) -> Dict[str, ndarray]:
    dLdP = -2 * (forward_info["y"] - forward_info["P"])
    dPdN = np.ones_like(forward_info["N"])
    dLdN = dLdP * dPdN
    dNdW = np.transpose(forward_info["X"])
    dLdW = np.dot(dNdW, dLdN)

    dPdB = np.ones_like(weights["B"])
    dLdB = dLdP * dPdB
    B = dLdB.sum(axis=0) # axis sum due to 1-dim B (?)

    # intermdiate results added to B and W for debugging purposes
    loss_gradients = {
        "dLdP": dLdP,
        "dPdN": dPdN,
        "dLdN": dLdN,
        "dNdW": dNdW,
        "W": dLdW,
        "dPdB": dPdB,
        "dLdB": dLdB,
        "B": B, 
    }
    return loss_gradients

loss_gradients(forward_info, weights)

{'dLdP': array([[-2],
        [ 2],
        [-2]]),
 'dPdN': array([[1],
        [1],
        [1]]),
 'dLdN': array([[-2],
        [ 2],
        [-2]]),
 'dNdW': array([[0, 3, 5],
        [1, 4, 6],
        [2, 5, 7]]),
 'W': array([[-4],
        [-6],
        [-8]]),
 'dPdB': array([1]),
 'dLdB': array([[-2],
        [ 2],
        [-2]]),
 'B': array([-2])}