# imports

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# data

# pointers
(set pointers for x_train, x_test, y_train, y_test)

In [1031]:
# overwrite when ready.....
x_train = np.ones((10,3))
x_test  = np.ones((5,3))
y_train = np.ones((10,1))
y_test  = np.ones((5,1))

In [1921]:
# x = np.ones((3,10))
# y = np.ones((1,10))

In [2061]:
x = np.array([
     1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
    12, 1,-3,22, 3,-1, 2,31,31, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
]).reshape(3,-1)
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9,  0],
       [12,  1, -3, 22,  3, -1,  2, 31, 31,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1]])

In [2063]:
y = 2*x[0,:] + np.random.normal(0,0.25,10).reshape(1,-1)
y

array([[ 2.01590629,  4.0551028 ,  6.22806779,  7.48536021, 10.13680832,
        12.49035653, 13.91913383, 15.81527023, 17.58333761,  0.4210097 ]])

# define MLP

In [1929]:
class AbstractAF:
    """Abstract Activation Function.
    This is an abstract class used to represent
    activation functions in a MultiLayer Perceptitron.

    Each Activation function must have a name
    and implement the following three functions:
    - fw(w,x)         represents a forward pass through the MLP
    - bp_w(w,x)       represents a dL/dw backprop through the MLP
    - bp_x(w,x)       represents a dL/dh backprop through the MLP <- TODO look into this.....

    This class does not implement any of the three functions.
    Child-classes MUST implement all three functions for 
    backprop to work properly.

    In the current implementation, the following classes are the only valid subclasses:
    - LinearAF
    - ReluAF
    """
    def __init__(self):
        self.name = "Abstract"

    def __repr__(self):
        """Overwrites the representation with class name.
        This function makes the print look cleaner :) 
        """
        return f"<ActivationFunction:{self.name}>"
    
    def fw(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

    def bp_w(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

    def bp_x(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

class MeanSquaredErrorAF(AbstractAF):
    """Mean Squared Error function"""
    def __init__(self):
        super().__init__()
        self.name = "MSE"
        self.axis = 0

    def fw(self,f,y):
        return   np.mean((f-y)**2,axis=self.axis)

    def bp(self,f,y):
        return 2*np.mean((f-y),   axis=self.axis)

class LinearAF(AbstractAF):
    """Linear Activation Function"""
    def __init__(self):
        super().__init__()
        self.name = "Linear"
    
    def fw(self,w,x):
        return w.T.dot(x)

    def bp_w(self,w,x):
        return x

    def bp_x(self,w,x):
        return w

class ReluAF(AbstractAF):
    """Relu Activation Function"""
    def __init__(self):
        super().__init__()
        self.name = "Relu"
        
    def fw(self,w,x):
        return np.maximum(0,w.T.dot(x))

    def bp_w(self,w,x):
        return (w.dot(x) > 0) * x

    def bp_x(self,w,x):
        return (w.dot(x) > 0) * w

In [1975]:
class MLP:
    """MultiLayer Perceptron
    Implementation Notes:
    - input and output layers must be defined explicitly.
    """
    def __init__(self):
        self.layers  = []
        self.weights = []
        self.loss = MeanSquaredErrorAF()

    def add_layer(self,nodes:int,afunc:AbstractAF) -> None:
        """Adds a layer with a given number of nodes
        and a given Abstract Function"""
        self.layers.append(MLPLayer(nodes,afunc))

    def _init_weights(self) -> None:
        """Initialize weights based on added layers"""
        assert len(self.layers) > 1, "layers must be added"

        # reset weights matrix
        self.weights = []

        # get the shape based on existing layers
        for i in range(1,len(self.layers)):
            w_shape = (self.layers[i-1].get_nodes(),
                       self.layers[i  ].get_nodes())
            self.weights.append(np.ones(w_shape)*0.01)

    def fw(self,x:np.array):
        """Performs a forward pass from
        x through n hidden layers to f_w(x)
        by applying an activation function 
        for each layer in the MLP.

        The function also initializes weight
        dimensions, if not done so already.

        Given the input example:
        x_ample = np.ones((3,n))
        
        each column would represent a sample
        ie: 
        > x_ample[:,0]   would be the 1st sample
        > x_ample[:,1]   would be the 2nd sample
        > x_ample[:,n-1] would be the nth sample
        etc.
        
        each row would represent a variable
        ie:
        > x_ample[0,:] would be the 1st parameter
        > x_ample[1,:] would be the 2nd parameter
        > x_ample[2,:] would be the 3rd parameter
        etc.

        The output of this function will generally take the shape:
        (m,n) where n is the number of columns in the input array
        and m is the number of node is the final layer in this MLP.
        In this case, we are predicting one value, how late the
        MBTA will be, and therefore m will always be 1.
        """

        # init weights if not yet done
        if len(self.weights) == 0:
            self._init_weights()

        # initialize x as the hidden value
        # of layer 0 (the input layer)
        self.layers[0].h = x

        # loop through and update x iteratively:
        for i in range(1,len(self.layers)):
            x = self.layers[i].fw(self.weights[i-1],x)

        # return x
        return x
    
    def _bp_list_factors(self,ridx):
        """Gets a list of factors to
        generate the corresponding
        weight matrix.
        
        ridx is the reversed index:
        - 0 refers to the last element
        - 1 refers to the 2nd to last element
        etc.
        """
        reversed_weights = list(reversed(self.weights))
        reversed_layers  = list(reversed(self.layers))

        # store factors to prod later 
        prod_factors = []

        # loop through the layers add dh
        for i in range(ridx):
            # print(f"{reversed_layers[i+1]}.bp_x(...); shape:{reversed_weights[i].shape}")
            prod_factors.append(reversed_layers[i+1].bp_x(reversed_weights[i]))

        # add dw
        # print(f"{reversed_layers[ridx+1]}.bp_w(...); shape:{reversed_weights[ridx].shape}")
        prod_factors.append(reversed_layers[ridx+1].bp_w(reversed_weights[ridx]))

        # return factors
        return prod_factors

    def _bp_dot(self,bp_list,loss):
        """bp_list is the list generated from _bp_list_factors()
        loss is the VALUES of loss as a matrix
        """
        prod_dh = loss.copy()
    
        # ignore the last value b/c it's dw not dh
        for i in range(len(bp_list) - 1):
            # perform a cumulative dot product
            # starting from back:
            prod_dh = bp_list[i].dot(prod_dh)
            # print(prod_dh.shape)
    
        # dot dw with the prod_dh transpose
        dldw = bp_list[-1].dot(prod_dh.T)
        return dldw
    
    def gd(self,x,y,eta:float=0.1,iters:int=10):
        # list of errors?
        ls_mse = []
        
        for i in range(iters):
            # compute the fwd pass
            fwp = self.fw(x)
            # compute the loss
            fwl = self.loss.fw(f=fwp,y=y).reshape(1,-1)
            bpl = self.loss.bp(f=fwp,y=y).reshape(1,-1)
            for fidx in range(len(self.weights)):
                ridx = len(self.weights) - fidx - 1
                # print(f"self.weights[{ridx}]",self.weights[ridx].shape)
                bpd = self._bp_dot(self._bp_list_factors(fidx),bpl)
                # print(f"self._bp_dot[{ridx}]",bpd.shape)

                # overwrite the weights
                self.weights[ridx] = (self.weights[ridx] - eta * bpd)
            
            ls_mse.append(fwl)
        return ls_mse

In [1977]:
class MLPLayer:
    """Represents a single layer in the MLP.
    
    """
    def __init__(self,nodes,afunc):
        self.nodes = int(nodes)
        self.afunc = afunc
        self.h = None

    def __repr__(self):
        """overwrite representation for pretty print"""
        return "<MLPLayer: {nodes:"+f"{self.nodes},afunc:{self.afunc}"+"}>"

    def get_nodes(self):
        return self.nodes+0

    def fw(self,w:np.array,x:np.array):
        """store and return the
        post-activation values 
        of a forward pass."""
        self.h = self.afunc.fw(w=w,x=x)
        return self.h.copy()

    def bp_w(self,w:np.array):
        return self.afunc.bp_w(w=w,x=self.h)

    def bp_x(self,w:np.array):
        return self.afunc.bp_x(w=w,x=self.h)

In [1982]:
mlp = MLP()
mlp.add_layer(3,LinearAF())  # input x
mlp.add_layer(4,LinearAF())   # hidden layer #0
mlp.add_layer(5,LinearAF())   # hidden layer #0

# mlp.add_layer(40,LinearAF()) # hidden layer #1
# mlp.add_layer(80,ReluAF())   # hidden layer #2
# mlp.add_layer(20,ReluAF())   # hidden layer #3

mlp.add_layer(1,LinearAF())    # prediction f_w(x)
mlp.layers

[<MLPLayer: {nodes:3,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:4,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:5,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:1,afunc:<ActivationFunction:Linear>}>]

In [1984]:
mlp._init_weights()
for weight_matrix in mlp.weights:
    print(weight_matrix.shape)

(3, 4)
(4, 5)
(5, 1)


In [1986]:
mlp.fw(np.ones((3,10)))

array([[6.e-05, 6.e-05, 6.e-05, 6.e-05, 6.e-05, 6.e-05, 6.e-05, 6.e-05,
        6.e-05, 6.e-05]])

In [1988]:
mlp.layers[0].h

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [1990]:
mlp.layers[1].h

array([[0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03],
       [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03],
       [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03],
       [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03]])

In [1992]:
mlp.layers[2].h

array([[0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012],
       [0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012],
       [0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012],
       [0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012],
       [0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012,
        0.0012, 0.0012]])

In [1994]:
dl_dw2 = mlp._bp_list_factors(0)
dl_dw2[0].shape

(5, 10)

In [1996]:
f = mlp.loss.fw(mlp.fw(x),y).reshape(1,-1)
f.shape

(1, 10)

In [1999]:
dldw3 = mlp._bp_list_factors(0)[0].dot(f.T)
dldw3.shape

(5, 1)

In [2001]:
bp1 = mlp._bp_list_factors(1)
dldw2 = bp1[1].dot(bp1[0].dot(f).T)
dldw2.shape

(4, 5)

In [2003]:
bp2 = mlp._bp_list_factors(2)
dldw1 = bp2[2].dot(bp2[1].dot(bp2[0].dot(f)).T)
dldw1.shape

(3, 4)

In [2005]:
def bp_dot(bp_list,loss):
    """bp_list is the list generated from bp
    loss is the VALUES of loss as a matrix
    """
    prod_dh = loss.copy()

    # ignore the last value b/c it's dw not dh
    for i in range(len(bp_list) - 1):
        # perform a cumulative dot product
        # starting from back:
        prod_dh = bp_list[i].dot(prod_dh)
        print(prod_dh.shape)

    # dot dw with the prod_dh transpose
    dldw = bp_list[-1].dot(prod_dh.T)
    return dldw

In [2007]:
dotbp(bp2,f)

(5, 10)
(4, 10)


array([[0.97466053, 0.97466053, 0.97466053, 0.97466053],
       [2.32256897, 2.32256897, 2.32256897, 2.32256897],
       [0.13847313, 0.13847313, 0.13847313, 0.13847313]])

In [2009]:
dotbp(bp1,f)

(5, 10)


array([[0.68714053, 0.68714053, 0.68714053, 0.68714053, 0.68714053],
       [0.68714053, 0.68714053, 0.68714053, 0.68714053, 0.68714053],
       [0.68714053, 0.68714053, 0.68714053, 0.68714053, 0.68714053],
       [0.68714053, 0.68714053, 0.68714053, 0.68714053, 0.68714053]])

In [2011]:
dotbp(mlp._bp_list_factors(0),f)

array([[2.7485621],
       [2.7485621],
       [2.7485621],
       [2.7485621],
       [2.7485621]])

In [2023]:
mlp.gd(x,y,eta=0.005)

[array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])]

In [2025]:
mlp.weights

[array([[nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan]]),
 array([[nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan]]),
 array([[nan],
        [nan],
        [nan],
        [nan],
        [nan]])]

In [2031]:
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9,  0],
       [12,  1, -3, 22,  3, -1,  2, 31, 31,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1]])

In [2033]:
y

array([[1.43961593, 1.9421026 , 3.09529678, 3.76097843, 5.14335224,
        6.04520241, 7.00779205, 7.63107449, 8.77811589, 0.13372618]])

In [2203]:
mlp = MLP()
mlp.add_layer(3,LinearAF())  # input x
mlp.add_layer(40,LinearAF())   # hidden layer #0
mlp.add_layer(80,LinearAF())   # hidden layer #1
mlp.add_layer(20,LinearAF())   # hidden layer #2
mlp.add_layer(1,LinearAF())    # prediction f_w(x)

In [2205]:
mlp.gd(x,y,eta=0.00002,iters=1000)

[array([[4.02783339e+00, 1.64231031e+01, 3.87808569e+01, 5.57722220e+01,
         1.02638140e+02, 1.55913095e+02, 1.93564163e+02, 2.49313686e+02,
         3.08251676e+02, 1.76710688e-01]]),
 array([[4.02666621e+00, 1.64224998e+01, 3.87807639e+01, 5.57638676e+01,
         1.02634649e+02, 1.55910465e+02, 1.93558940e+02, 2.49287635e+02,
         3.08222064e+02, 1.76696954e-01]]),
 array([[4.02547163e+00, 1.64218857e+01, 3.87806771e+01, 5.57553176e+01,
         1.02631091e+02, 1.55907798e+02, 1.93553622e+02, 2.49260979e+02,
         3.08191767e+02, 1.76683084e-01]]),
 array([[4.02424873e+00, 1.64212606e+01, 3.87805967e+01, 5.57465653e+01,
         1.02627464e+02, 1.55905092e+02, 1.93548205e+02, 2.49233698e+02,
         3.08160765e+02, 1.76669075e-01]]),
 array([[4.02299655e+00, 1.64206240e+01, 3.87805230e+01, 5.57376041e+01,
         1.02623764e+02, 1.55902346e+02, 1.93542687e+02, 2.49205772e+02,
         3.08129032e+02, 1.76654922e-01]]),
 array([[4.02171413e+00, 1.64199756e+01, 3.8780456

In [2207]:
x_test = np.array([10,15,-1,
                   22,-1, 3,
                    1, 1, 1,]).reshape(3,-1)
# i expect y_test to be ~20, ~30, -2


In [2209]:
mlp.fw(x_test)

array([[19.6701867 , 29.96561267, -1.65251768]])

In [569]:
# example input:
x_ample = np.ones((3,5))

"""
Given the imput example:
x_ample = np.ones((3,5))

each column would represent a sample
ie: 
> x_ample[:,0] would be the first weather sample
> x_ample[:,1] would be the second weather sample
etc.

each row would represent a variable
ie:
> x_ample[1,:] would be all the tempertures
> x_ample[2,:] would be all the precipitations
> x_ample[3,:] would be all the biases
etc.
"""

# going to print an instance of a forward pass
print(
    mlp.fw(x_ample)
)
"""
mlp.fw(x_ample)
is a matrix that looks like this:
np.array([[0,0,0,0,0]]) # for now:

again, each column would represent
a prediction of y.

If we had multiple rows, as an output,
it could / would be displayed here.
and we could do some sort of 
verification with those.
""";

[[0. 0. 0. 0. 0.]]


In [572]:
test_layers = mlp.layers.copy()
test_layers

[<MLPLayer: {nodes:3,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:5,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:1,afunc:<ActivationFunction:Relu>}>]

In [68]:
test_weights = mlp.weights.copy()
test_weights

[array([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]),
 array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]])]

In [88]:
def test_bp(ridx):
    """ridx is the reversed index:
    - 0 refers to the last element
    - 1 refers to the 2nd to last element
    etc.
    """
    reversed_weights = list(reversed(test_weights))
    reversed_layers  = list(reversed(test_weights))
    
    prod_factors = []
    
    for i in range(ridx):
        prod_factors.append(f"reversed_layers[{i}].bp_x(...)")

    prod_factors.append(f"reversed_layers[{ridx}].bp_w(...)")
    return prod_factors

In [90]:
test_bp(0)

['reversed_layers[0].bp_w(...)']

In [92]:
test_bp(1)

['reversed_layers[0].bp_x(...)', 'reversed_layers[1].bp_w(...)']

In [94]:
test_bp(2)

['reversed_layers[0].bp_x(...)',
 'reversed_layers[1].bp_x(...)',
 'reversed_layers[2].bp_w(...)']

In [98]:
test_bp(3)

['reversed_layers[0].bp_x(...)',
 'reversed_layers[1].bp_x(...)',
 'reversed_layers[2].bp_x(...)',
 'reversed_layers[3].bp_w(...)']

In [72]:
list(reversed(test_weights))

[array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])]

In [78]:
for i in range(5):
    print(i)

0
1
2
3
4


In [80]:
for i in range(0):
    print(i)

In [1027]:
for i in range(10):
    print(10-1-i)

9
8
7
6
5
4
3
2
1
0
