In [11]:
import numpy as np

In [12]:
# https://web.eecs.umich.edu/~justincj/teaching/eecs442/notes/linear-backprop.html

In [13]:
class Linear:
    def __init__(self, num_in, num_out, use_bias = True) -> None:
        self.use_bias = use_bias
        self.num_in = num_in
        self.num_out = num_out
        self.matr = np.random.normal(0, 1., (num_in + (1 if use_bias else 0), num_out))
        self.grads = np.zeros_like(self.matr)
        self.last_xs = None

    def get_grad(self, upstream_resp_loss: np.ndarray) -> np.ndarray:
        # divvy up the blame!
        # except now, just do it with the last inputs

        # the loss 'attributed' that we will pass up, (the loss 'attributed' to inputs) is proportonial to the weight mat
        if not self.use_bias:
            dloss_din = upstream_resp_loss @ self.matr.T
        else:
            # chop off the bias!
            dloss_din = upstream_resp_loss @ self.matr[:-1, :].T

        # the loss 'attributed' to the weights is directly proportional to the inputs we recieved
        dloss_dweights = self.last_xs.T @ upstream_resp_loss

        assert self.grads.shape == dloss_dweights.shape
        self.grads = dloss_dweights

        return dloss_din

    def forward(self, xs: np.ndarray, no_record: bool = False) -> np.ndarray:
        # fix xs, if we need bias
        fixed_xs = xs
        if self.use_bias:
            fixed_xs = np.hstack((fixed_xs, np.ones((fixed_xs.shape[0], 1))))
        
        if not no_record:
            self.last_xs = fixed_xs
        
        return fixed_xs @ self.matr

In [14]:
class LinearPercepFused(Linear):
    def forward(self, xs: np.ndarray, no_record: bool = False) -> np.ndarray:
        outp = super().forward(xs, no_record)
        return np.where(outp > 0., 1., 0.)

In [15]:
class MSELoss:
    def __init__(self) -> None:
        self.last_diffs = None

    def get_grad(self) -> np.ndarray:
        return 2.*self.last_diffs/len(self.last_diffs)

    def forward(self, y: np.ndarray, y_exp) -> np.ndarray:
        self.last_diffs = y - y_exp
        return np.mean(self.last_diffs**2)

In [16]:
# xs = np.linspace(-10, 10, 1000)
# ys = xs*10 + 2
# xs += np.random.normal(0.,.1, len(xs))

# xs = xs.reshape(-1, 1)
# ys = ys.reshape(-1, 1)

xs = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])
ys = np.array([[0.], [1.], [1.], [0.]])

In [17]:
xs_repeat = xs.repeat(10, 0)
ys_repeat = ys.repeat(10, 0)
ys_repeat += np.random.normal(0, .01, size=ys_repeat.shape)

In [18]:
import matplotlib.pyplot as plt

In [19]:
LR = .001

In [20]:
lin1 = LinearPercepFused(2, 20, use_bias=True)
lin2 = LinearPercepFused(20, 1, use_bias=True)
lossf = MSELoss()

for _ in range(3000):
    out_l1 = lin1.forward(xs_repeat)
    out = lin2.forward(out_l1)

    loss = lossf.forward(out, ys_repeat)
    print(f"{loss=}")
    
    pass_back_grad = lossf.get_grad()
    pass_back_grad = lin2.get_grad(pass_back_grad)
    pass_back_grad = lin1.get_grad(pass_back_grad)

    lin1.matr += lin1.grads*-LR
    lin2.matr += lin2.grads*-LR

loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.4980464714817792
loss=0.498046471