In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

In [47]:
# Using SkLearn to get a sample regression dataset
X, y1 = make_regression(n_samples=200, n_features=8, n_informative=5, n_targets=5, noise=10)


In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=42)

input_size = X_train.shape[1]
output_size = y_train.shape[1]
samples = X_train.shape[0]

X_train = X_train.reshape(input_size, samples) # input_size x batch_size = 8 * 160 
X_test = X_test.reshape(input_size, -1) # input_size x batch_size = 8 * 40 
y_train = y_train.reshape(output_size,samples) # output_size x batch_size = 1 x 160
y_test = y_test.reshape(output_size,-1) # output_size x batch_size = 1 x 40

In [49]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8, 160), (8, 40), (5, 160), (5, 40))

In [50]:
class MultiLinearRegression():
    def __init__(self, input_size, output_size, batch_size, lr):
        self.weights = np.random.randn(output_size, input_size) # W: 1x8
        self.bias = np.random.randn(output_size, batch_size) # 1x160
        self.lr = lr
        self.batch_size = batch_size
        return
    
    def forward(self, X):
        '''
        X = 8 x 160 (input_size x batch_size)
        weights = 1 x 8 (output_size x input_size)
        bias = 1 x 160 (output_size x batch_size)
        '''
        return np.dot(self.weights, X) + self.bias # output_size x 1 = 1x1

    def backward(self, X, Y, Y_hat):
        '''
        All inputs are matrices.
        X = 8 x 1 (input_size x 1)
        Y = Y_hat = 1 x 160 (output_size x batch)

        calculate the following gradients.
        params update: dL/DW, dL/dB
        dL/dY needed for chain rule calc of params

        dL/dY = (2/n) * (Y - Y_hat)
        dL/dW = dL/dY . X.T
        dL/dB = dL/dY
        '''
        dL_dY = (2/self.batch_size) * np.subtract(Y, Y_hat) # (1 x 160)
        dL_dW = np.dot(dL_dY, X.T) # (1 x 8)
        dL_dB = dL_dY # (1 x 160)

        self.weights-= self.lr*dL_dW
        self.bias-= self.lr*dL_dB
        return

def mse_loss(Y, Y_hat):
    '''
    Y and Y_hat are vectors. mse = 1/n (summation((y-y_hat)**2))
    Y = Y_hat = output_size x batch = 1 x 160
    '''
    e = np.subtract(Y, Y_hat)
    n = Y.shape[1]
    return (1/n) * np.sum(np.square(e)) # to return element-wise square of the array input, use the numpy.square() method
    # return np.sum((e)**2) # works the same as above
    # return np.matmul(e.T, e)  # CF = e_T * e perform summation over all squared errors


In [51]:
# Hyperparameters for training
epochs = 100
lr = 0.001


model = MultiLinearRegression(input_size=input_size, output_size=output_size, batch_size=samples, lr=lr)

In [52]:
train_loss = []
for epoch in range(epochs):
    # forward pass - get prediction
    Y_hat = model.forward(X_train) # X_train: (8 x 160), Y_hat :(1 x 160)
    
    # loss
    loss = mse_loss(y_train, Y_hat) # loss : scalar
    train_loss.append(loss)

    # backward pass - update gradients
    '''
    model.weights.shape : (1, 8), model.bias.shape : (1, 160)
    '''
    model.backward(X_train, y_train, Y_hat)
    if epoch%20==0:
        print(f'Loss = {round(loss,4)} at epoch {epoch}/{epochs}')

print(f"Average Train MSE loss over {epochs} epochs = {round(np.average(train_loss),4)}")


Loss = 106925.9871 at epoch 0/100
Loss = 107398.8235 at epoch 20/100
Loss = 107909.2811 at epoch 40/100
Loss = 108460.8641 at epoch 60/100
Loss = 109057.4167 at epoch 80/100
Average Train MSE loss over 100 epochs = 108210.703
