In [673]:
import numpy as np

In [674]:
def forward_linear_regression(X_batch: np.ndarray, y_batch: np.ndarray, weights: dict[str, np.ndarray]) -> tuple[float, dict[str, np.ndarray]]:
    """
    Implementation of forward pass of the perceptron.
    Here, We are calculating the matrices to get the prediction label.
    """
    # Checking if the number of labels is equal to number of inputs
    assert X_batch.shape[0] == y_batch.shape[0]
    # Checking the dimensions for matrix multiplication
    assert X_batch.shape[1] == weights['W'].shape[0]
    # Checking that B is 1x1 ndarray
    assert weights['B'].shape[0] == weights['B'].shape[1] == 1

    N = np.dot(X_batch, weights['W'])
    P = N + weights['B']

    loss = np.mean(np.square(y_batch - P))
    forward_info: dict[str, np.ndarray] = {'X': X_batch, 'N': N, 'P': P, 'y': y_batch}

    return loss, forward_info

In [675]:
def loss_gradients(forward_info: dict[str, np.ndarray], weights: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
    batch_size  = forward_info['X'].shape[0]
    """
    dLdW = dLdP * dPdN * dNdW
    dLdP = dLdP * dPdB * dBdB
    """
    dLdP = -2 * (forward_info['y'] - forward_info['P'])
    dPdN = np.ones_like(forward_info['N'])
    dPdB = np.ones_like(weights['B'])

    dLdN = dLdP * dPdN
    dNdW = np.transpose(forward_info['X'], (1, 0)) # Check this

    dLdW = np.dot(dNdW, dLdN)
    dLdB = (dLdP * dPdB).sum(axis=0)

    loss_gradients: dict[str, np.ndarray] = {'W': dLdW, 'B': dLdB}

    return loss_gradients

In [676]:
Batch = tuple[np.ndarray, np.ndarray]

def generate_batch(X: np.ndarray, y: np.ndarray, start: int = 0, batch_size: int = 32) -> Batch:
    # Checking for perceptron requirements and same units
    assert X.ndim == y.ndim == 2
    
    # Controlling overflow
    if start + batch_size > X.shape[0]:
        batch_size = X.shape[0] - start
    
    X_batch, y_batch = X[start:start + batch_size], y[start:start + batch_size]
    
    return X_batch, y_batch

In [677]:
def forward_loss(X: np.ndarray, y:np.ndarray, weights: dict[str, np.ndarray]) -> tuple[dict[str, np.ndarray], float]:
    N = np.dot(X, weights['W'])
    P = N + weights['B']
    loss = np.mean(np.square(y - P))

    forward_info: dict[str, np.ndarray] = {'X': X, 'N': N, 'P': P, 'y': y}

    return forward_info, loss

In [678]:
def init_weights(n_in: int) -> dict[str, np.ndarray]:
    W = np.random.randn(n_in, 1)
    B = np.random.randn(1, 1)
    
    weights = {'W': W, 'B': B}
    
    return weights

In [679]:
def train(X: np.ndarray, y: np.ndarray, epochs: int = 100, batch_size: int = 32, learning_rate: float = 0.01,
          return_losses:bool = False, return_weights:bool = False, seed: int = 0
          ):

    if seed != 0:
        np.random.seed(seed)
    # Shuffling Data
    perm = np.random.permutation(X.shape[0])
    X, y = X[perm], y[perm]

    start = 0
    weightst = init_weights(X.shape[1])

    if return_losses:
        losses = []

    for _ in range(epochs):
        if start >= X.shape[0]:
            perm = np.random.permutation(X.shape[0])
            X, y = X[perm], y[perm]
            start = 0

        X_batch, y_batch = generate_batch(X, y, start, batch_size)
        start += batch_size

        forward_info, loss = forward_loss(X_batch, y_batch, weightst)

        if return_losses:
            losses.append(loss)

        loss_grads = loss_gradients(forward_info, weightst)
        for key in weightst.keys():
            weightst[key] -= (learning_rate * loss_grads[key])

        # print(weightst['W'], (learning_rate * loss_grads['W']), weightst['W'] - (learning_rate * loss_grads['W']))

    if return_weights:
        return losses, weightst

    return None

In [680]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

In [681]:
from sklearn.preprocessing import StandardScaler
s = StandardScaler()
X = s.fit_transform(X)

In [682]:
divd = int(0.7*len(X))
X_train, X_test, y_train, y_test = X[:divd], X[divd:], y[:divd], y[divd:]

y_train, y_test = y_train.reshape(-1, 1), y_test.reshape(-1, 1)

In [683]:
train_info = train(X_train, y_train, epochs=1000, batch_size=32, learning_rate=0.001, return_losses=True, return_weights=True)
losses = train_info[0]
weights = train_info[1]

In [684]:
losses

[np.float64(29117.2601353112),
 np.float64(20871.597429577938),
 np.float64(22990.00349596698),
 np.float64(21577.844177997555),
 np.float64(14213.410916054374),
 np.float64(20492.835100645585),
 np.float64(12700.022428153967),
 np.float64(13527.9172714378),
 np.float64(10081.197424408481),
 np.float64(12039.725142427375),
 np.float64(9648.817140969239),
 np.float64(8310.281497436032),
 np.float64(9285.464947467211),
 np.float64(7822.992798423985),
 np.float64(5972.054594295667),
 np.float64(5800.778989776171),
 np.float64(4867.730028242388),
 np.float64(6918.091646578207),
 np.float64(6786.146595080458),
 np.float64(5311.315596487744),
 np.float64(4993.615671731728),
 np.float64(4101.865639740742),
 np.float64(5110.5804522006365),
 np.float64(4173.570607271394),
 np.float64(4152.523217340489),
 np.float64(4459.285173799011),
 np.float64(4115.626505076714),
 np.float64(3861.7046415896993),
 np.float64(3493.652202265216),
 np.float64(3344.3371145858628),
 np.float64(3532.7448115846382),

In [685]:
weights

{'W': array([[ -0.43408289],
        [-12.65724898],
        [ 26.19698856],
        [ 13.11903949],
        [ -8.00756554],
        [ -2.51139975],
        [ -5.62079708],
        [  8.10297603],
        [ 26.33291994],
        [  5.28669503]]),
 'B': array([[153.2199529]])}