In [698]:
import numpy as np

In [699]:
def forward_linear_regression(X_batch: np.ndarray, y_batch: np.ndarray, weights: dict[str, np.ndarray]) -> tuple[float, dict[str, np.ndarray]]:
    """
    Implementation of forward pass of the perceptron.
    Here, We are calculating the matrices to get the prediction label.
    """
    # Checking if the number of labels is equal to number of inputs
    assert X_batch.shape[0] == y_batch.shape[0]
    # Checking the dimensions for matrix multiplication
    assert X_batch.shape[1] == weights['W'].shape[0]
    # Checking that B is 1x1 ndarray
    assert weights['B'].shape[0] == weights['B'].shape[1] == 1

    N = np.dot(X_batch, weights['W'])
    P = N + weights['B']

    loss = np.mean(np.square(y_batch - P))
    forward_info: dict[str, np.ndarray] = {'X': X_batch, 'N': N, 'P': P, 'y': y_batch}

    return loss, forward_info

In [700]:
def loss_gradients(forward_info: dict[str, np.ndarray], weights: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
    batch_size  = forward_info['X'].shape[0]
    """
    dLdW = dLdP * dPdN * dNdW
    dLdP = dLdP * dPdB * dBdB
    """
    dLdP = -2 * (forward_info['y'] - forward_info['P'])
    dPdN = np.ones_like(forward_info['N'])
    dPdB = np.ones_like(weights['B'])

    dLdN = dLdP * dPdN
    dNdW = np.transpose(forward_info['X'], (1, 0)) # Check this

    dLdW = np.dot(dNdW, dLdN)
    dLdB = (dLdP * dPdB).sum(axis=0)

    loss_gradients: dict[str, np.ndarray] = {'W': dLdW, 'B': dLdB}

    return loss_gradients

In [701]:
Batch = tuple[np.ndarray, np.ndarray]

def generate_batch(X: np.ndarray, y: np.ndarray, start: int = 0, batch_size: int = 32) -> Batch:
    # Checking for perceptron requirements and same units
    assert X.ndim == y.ndim == 2
    
    # Controlling overflow
    if start + batch_size > X.shape[0]:
        batch_size = X.shape[0] - start
    
    X_batch, y_batch = X[start:start + batch_size], y[start:start + batch_size]
    
    return X_batch, y_batch

In [702]:
def forward_loss(X: np.ndarray, y:np.ndarray, weights: dict[str, np.ndarray]) -> tuple[dict[str, np.ndarray], float]:
    N = np.dot(X, weights['W'])
    P = N + weights['B']
    loss = np.mean(np.square(y - P))

    forward_info: dict[str, np.ndarray] = {'X': X, 'N': N, 'P': P, 'y': y}

    return forward_info, loss

In [703]:
def init_weights(n_in: int) -> dict[str, np.ndarray]:
    W = np.random.randn(n_in, 1)
    B = np.random.randn(1, 1)
    
    weights = {'W': W, 'B': B}
    
    return weights

In [704]:
def train(X: np.ndarray, y: np.ndarray, epochs: int = 100, batch_size: int = 32, learning_rate: float = 0.01,
          return_losses:bool = False, return_weights:bool = False, seed: int = 0
          ):

    if seed != 0:
        np.random.seed(seed)
    # Shuffling Data
    perm = np.random.permutation(X.shape[0])
    X, y = X[perm], y[perm]

    start = 0
    weightst = init_weights(X.shape[1])

    if return_losses:
        losses = []

    for _ in range(epochs):
        if start >= X.shape[0]:
            perm = np.random.permutation(X.shape[0])
            X, y = X[perm], y[perm]
            start = 0

        X_batch, y_batch = generate_batch(X, y, start, batch_size)
        start += batch_size

        forward_info, loss = forward_loss(X_batch, y_batch, weightst)

        if return_losses:
            losses.append(loss)

        loss_grads = loss_gradients(forward_info, weightst)
        for key in weightst.keys():
            weightst[key] -= (learning_rate * loss_grads[key])

        # print(weightst['W'], (learning_rate * loss_grads['W']), weightst['W'] - (learning_rate * loss_grads['W']))

    if return_weights:
        return losses, weightst

    return None

In [705]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

In [706]:
from sklearn.preprocessing import StandardScaler
s = StandardScaler()
X = s.fit_transform(X)

In [707]:
divd = int(0.7*len(X))
X_train, X_test, y_train, y_test = X[:divd], X[divd:], y[:divd], y[divd:]

y_train, y_test = y_train.reshape(-1, 1), y_test.reshape(-1, 1)

In [708]:
train_info = train(X_train, y_train, epochs=100, batch_size=32, learning_rate=0.001, return_losses=True, return_weights=True)
losses = train_info[0]
weights = train_info[1]

In [709]:
losses

[np.float64(25477.434886902054),
 np.float64(29290.797073118585),
 np.float64(18326.806280997233),
 np.float64(26942.86951368122),
 np.float64(15057.123416614857),
 np.float64(13977.44981543089),
 np.float64(14054.0092655529),
 np.float64(15803.084195664913),
 np.float64(12415.167571721078),
 np.float64(8325.344841622487),
 np.float64(10193.871089143166),
 np.float64(7890.100157083074),
 np.float64(7382.223115985411),
 np.float64(6984.491191272788),
 np.float64(7673.452682342233),
 np.float64(4847.3573996588275),
 np.float64(6804.3236558200715),
 np.float64(7551.957018186113),
 np.float64(6224.476904145541),
 np.float64(6456.28784463516),
 np.float64(5621.583808555383),
 np.float64(5424.442583909335),
 np.float64(4153.74932347992),
 np.float64(2995.086668344571),
 np.float64(3659.081691488398),
 np.float64(2617.8943270151235),
 np.float64(3815.253075216295),
 np.float64(3583.992581811881),
 np.float64(6009.271556910835),
 np.float64(4082.597727929987),
 np.float64(4042.958227320493),
 

In [710]:
weights

{'W': array([[ -0.20596269],
        [-12.96181516],
        [ 26.03833422],
        [ 13.68219862],
        [ -2.93616292],
        [ -5.99441123],
        [ -9.08201409],
        [  6.47473474],
        [ 24.40483544],
        [  5.36870243]]),
 'B': array([[152.8396778]])}