<a href="https://colab.research.google.com/github/maxmatical/fast.ai/blob/master/Numpy_ML_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Networks (and logistic regression)
https://quantdare.com/create-your-own-deep-learning-framework-using-numpy/

In [201]:
from abc import ABC, abstractmethod
from typing import List, Optional, Tuple
from enum import Enum, auto
import numpy as np


In [215]:
# A
a =[0, 0, 1, 1, 0, 0,
   0, 1, 0, 0, 1, 0,
   1, 1, 1, 1, 1, 1,
   1, 0, 0, 0, 0, 1,
   1, 0, 0, 0, 0, 1]
# B
b =[0, 1, 1, 1, 1, 0,
   0, 1, 0, 0, 1, 0,
   0, 1, 1, 1, 1, 0,
   0, 1, 0, 0, 1, 0,
   0, 1, 1, 1, 1, 0]
# C
c =[0, 1, 1, 1, 1, 0,
   0, 1, 0, 0, 0, 0,
   0, 1, 0, 0, 0, 0,
   0, 1, 0, 0, 0, 0,
   0, 1, 1, 1, 1, 0]

# Creating labels
# y =[[1, 0, 0],
#    [0, 1, 0],
#    [0, 0, 1]]

y = [1,2,3]

In [216]:
x =np.array([a, b, c])
  
  
# Labels are also converted into NumPy array
y = np.array(y)
  
  
print(x, "\n\n", y)

[[0 0 1 1 0 0 0 1 0 0 1 0 1 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 0 1]
 [0 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0]
 [0 1 1 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0]] 

 [1 0 1]


In [217]:
x.shape, y.shape

((3, 30), (3,))

In [218]:
class Layer(ABC):

    @abstractmethod
    def forward(self, input):
        pass

    @abstractmethod
    def backward(self, input):
        pass


class LayerType(Enum):
    # to keep track of layer types in NN
    # layer type determines what is returned in the backward pass
    linear = auto()
    relu = auto()
    sigmoid = auto()
    identity = auto()


# layers
class Linear(Layer):
    def __init__(self, in_dim: int, out_dim: int):
        self.weights = np.random.normal(size=(out_dim, in_dim)) # rows x cols
        # so when 
        self.biases = np.random.rand(out_dim, 1)
        # keep track of layer type for computing gradients
        self.type = "linear"
        

    def forward(self, input: np.ndarray) -> np.ndarray:
        # input into the layer is activations from previous layer
        # keep track of previous activations for back prop
        self.prev_activations = input 
        return np.matmul(self.weights, input) + self.biases


    def backward(self, dA: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        by chain rule, dW is just equals to dA dot product with previous activations
        """
        dW = np.dot(dA, self.prev_activations.T) # self.previous_activations.T for shape to match
        dB = dA.mean(axis=1, keepdims=True) # average across columns (where dim = 1) to get dB
        error = np.dot(self.weights.T, dA) # propagate error backwords by multiplying dA by weights
        return error, dW, dB


    def update(self, dW: np.ndarray, dB: np.ndarray, lr: float):
        # update the weights and biases given gradients and lr
        self.weights -= lr * dW
        self.biases -= lr * dB




In [219]:
l = Linear(30, 4)
l.forward(x.T)

array([[ 1.39297034,  0.83342806,  3.55624561],
       [ 5.34235077,  6.24693926,  4.09729335],
       [ 8.05260647,  5.36259176,  1.92813514],
       [-1.52783894,  0.85110861,  0.13708511]])

In [220]:
x.mean(axis=1, keepdims=True)

array([[0.46666667],
       [0.53333333],
       [0.36666667]])

In [221]:
# activations
class ReLU(Layer):
    def __init__(self):
        # keep track of layer type for computing gradients
        self.type = "relu"
    

    def forward(self, input: np.ndarray) -> np.ndarray:
        # keep track of activations for backprop
        self.activations = np.maximum(0, input) 
        return self.activations


    def backward(self, error: np.ndarray) -> np.ndarray:
        """
        gradient of relu is 1 if self.activation > 0, 0 otherwise
        """
        # gradients = np.copy(self.activations)
        # gradients[self.activations > 0] = 1
        # gradients[self.activations <= 0] = 0
        # return error * gradients

        """
        alternatively, use heaviside function
        """
        return error * np.heaviside(self.activations, 0)
        

class Sigmoid(Layer):
    def __init__(self):
        # keep track of layer type for computing gradients
        self.type = "sigmoid"


    def forward(self, input: np.ndarray) -> np.ndarray:
        # keep track of activations for gradient calc in backward pass
        self.activations = 1/(1 + np.exp(-input))
        return self.activations


    def backward(self, error: np.ndarray) -> np.ndarray:
        """
        derivative of sigmoid function is sig*(1-sig)
        """
        sig = self.activations
        return error * sig * (1 - sig)

    
class Identity(Layer):
    def __init__(self):
        # keep track of layer type for computing gradients
        self.type = "identity"

    # identity function, just return itself
    def forward(self, input: np.ndarray) -> np.ndarray:
        # keep track of activations for gradient calc in backward pass
        self.activations = input
        return self.activations

    def backward(self, error: np.ndarray) -> np.ndarray:
        """
        derivative of identity function is identity matrix
        so error * I = error
        """
        return error


In [222]:
# loss functions
class LossType(Enum):
    mse = auto()
    bce = auto()


class MSE(Layer):
    def __init__(self, pred, y):
        self.pred = pred
        self.y = y


    def forward(self) -> float:
        return np.power(self.pred - self.y, 2).mean()


    def backward(self) -> np.ndarray:
        # need to keep same dimensions as pred and y
        # to get loss wrt to each output
        # since the 2 is a constant factor, we can leave out
        # (equivalent to multiplying lr by 2)
        return self.pred - self.y


class BCE(Layer):
    def __init__(self, pred, y):
        self.pred = pred
        self.y = y
        self.n = len(self.y)


    def forward(self) -> float:
        loss = np.nansum(-self.y * np.log(self.pred) - (1 - self.y) * np.log(1 - self.pred)) / self.n
        return np.squeeze(loss)


    def backward(self) -> np.ndarray:
        return (-(self.y / self.pred) + ((1 - self.y) / (1 - self.pred))) / self.n

In [223]:
mse = MSE(y+5, y)
mse.backward()

array([5, 5, 5])

In [224]:
bce = BCE(y+5, y)
# bce.forward()
bce.backward()

array([-0.05555556, -0.08333333, -0.05555556])

In [225]:
class Model:
    def __init__(self, in_dim: int, layers: Optional[List[int]], out_dim: int, loss_fn: str):
        self.in_dim = in_dim # get input dimension for matmul, = X.shape[1]
        self.layers=[]
        self.loss_fn = loss_fn
        assert self.loss_fn in [l.name for l in LossType], f"error, loss {self.loss_fn} not recognized"

        if layers:
            """if layers not given, is equivalent to linear/logistic regression
            """
            for l in layers:
                # each layer is a feedforward + relu
                self.layers.append(Linear(self.in_dim, l))
                self.layers.append(ReLU())
                self.in_dim = l
        # add final classifier layer + activation
        self.layers.append(Linear(self.in_dim, out_dim))
        final_act_fn = Identity() if self.loss_fn == LossType.mse.name else Sigmoid()
        self.layers.append(final_act_fn)


    def forward(self, input: np.ndarray) -> np.ndarray:
        for l in self.layers:
            input = l.forward(input)
        return input
        

    def backward(self, pred: np.ndarray, y: np.ndarray, lr: float):
        loss_func = MSE(pred, y) if self.loss_fn == LossType.mse.name else BCE(pred, y)
        loss = loss_func.forward()
        # compute gradients
        gradient = loss_func.backward()

        # backprop through the layers:
        for i, l in reversed(list(enumerate(self.layers))):
            if l.type != LayerType.linear.name:
                # if not linear layer, just calculate the gradient and 
                # pass to the previous layer
                gradient = self.layers[i].backward(gradient)
            else:
                gradient, dW, dB = self.layers[i].backward(gradient)
                self.layers[i].update(dW, dB, lr)
        return loss # keep track of loss


    def fit(self, input: np.ndarray, y: np.ndarray, n_epochs: int, lr: float):
        for _ in range(n_epochs):
            out = self.forward(input)
            loss = self.backward(out, y, lr)
            print(f"loss {self.loss_fn} = {loss}")


In [226]:
nn = Model(in_dim=30, layers = [4,5,6], out_dim = 1, loss_fn="mse")
# use x.T to make shapes match
out = nn.forward(x.T)
out

array([[ 0.81832437, 11.13986373,  6.87608558]])

In [227]:
nn.backward(out, y, lr=0.001)

52.88598392077802

In [228]:
nn.fit(x.T, y, n_epochs=10, lr=0.001)

loss mse = 6.590479597024867
loss mse = 3.5052972509792166
loss mse = 2.2071097880932498
loss mse = 1.505267191643277
loss mse = 1.1115366041724635
loss mse = 0.8224457792898495
loss mse = 0.6212247369714167
loss mse = 0.5148845118415777
loss mse = 0.4116553871564776
loss mse = 0.3422654683061006


In [229]:
"""
linear/logistic regression is equivalent to MLP
just without any hidden layers
"""
log_reg = Model(in_dim=30, layers = None, out_dim = 1, loss_fn="bce")
log_reg.fit(x.T, y, n_epochs=10, lr=0.001)

loss bce = 1.2452257546964574
loss bce = 1.24474912884293
loss bce = 1.24427293488365
loss bce = 1.243797171968099
loss bce = 1.2433218392470675
loss bce = 1.2428469358726602
loss bce = 1.2423724609982967
loss bce = 1.2418984137787177
loss bce = 1.2414247933699865
loss bce = 1.2409515989294908


# Decision Tree/ Random Forests

https://forums.fast.ai/t/unofficial-lesson-7-classnotes/7955