In [45]:
import numpy as np
from numpy import ndarray
from typing import Dict, Tuple

In [46]:
def linear_regression_forward(X_batch: ndarray,
                              y_batch: ndarray,
                              weights: Dict[str, ndarray]) -> Tuple[float, Dict[str, ndarray]]:
    # input dimensions
    assert X_batch.shape[0] == y_batch.shape[0]
    assert X_batch.shape[1] == weights['W'].shape[0]
    assert weights['B'].shape[0] == weights['B'].shape[1]
    
    # forward pass
    N = np.dot(X_batch, weights['W'])
    P = N + weights['B']
    
    # calculate loss
    # method 1
    loss = np.mean(np.power(y_batch - P, 2))
    # method 2
    # loss = np.sum(np.power(y_batch - P, 2))/(2*len(X_batch))
    
    # save forward pass info
    forward_info: Dict[str, ndarray] = {}
    forward_info['X'] = X_batch
    forward_info['P'] = P
    forward_info['N'] = N 
    forward_info['y'] = y_batch
    
    return loss, forward_info

In [47]:
X = np.array([
    [0,2],
    [2,5],
    [3,4]
     ])

y = np.array([
    [0],
    [1],
    [1]
     ])

w = {
    'W': np.array([
        [0.2],
        [0.1]
        ]),
    'B': np.array([[0.1]])
}

loss, forward_info = linear_regression_forward(X, y, w)
print('Loss: ', loss)
print('Forward Pass: ');display(forward_info)

Loss:  0.03333333333333335
Forward Pass: 


{'X': array([[0, 2],
        [2, 5],
        [3, 4]]),
 'P': array([[0.3],
        [1. ],
        [1.1]]),
 'N': array([[0.2],
        [0.9],
        [1. ]]),
 'y': array([[0],
        [1],
        [1]])}

In [48]:
def loss_gradients(forward_info: Dict[str, ndarray], weights: Dict[str, ndarray]) -> Dict[str, ndarray]:
    batch_size = forward_info['X'].shape[0] # number of samples
    
    # find grads: dLdB, dLdW
    
    # grad of loss wrt p -> using squared error: (y-P) ^ 2
    dLdP = -2 * (forward_info['y'] - forward_info['P'])
    # for mean squared error, grad of loss wrt p -> using mean squared error: 1/2(y-P) ^ 2
    # dLdP = forward_info['P'] - forward_info['y']
    
    # intermediate derivatives
    dPdN = np.ones_like(forward_info['N'])
    dPdB = np.ones_like(weights['B'])
    dLdN = dLdP * dPdN 
    dNdW = np.transpose(forward_info['X'], (1, 0))
    
    # final derivatives of w, b
    dLdW = np.dot(dNdW, dLdN)
    dLdB = (dLdP * dPdB).sum(axis=0, keepdims=True)
    
    # save the loss
    loss_gradients: Dict[str, ndarray] = {}
    loss_gradients['W'] = dLdW
    loss_gradients['B'] = dLdB
    
    return loss_gradients

loss_g = loss_gradients(forward_info, w)
display(loss_g)

{'W': array([[0.6],
        [2. ]]),
 'B': array([[0.8]])}