# imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# data

# pointers
(set pointers for x_train, x_test, y_train, y_test)

In [7]:
# overwrite when ready.....
x_train = np.ones((10,3))
x_test  = np.ones((5,3))
y_train = np.ones((10,1))
y_test  = np.ones((5,1))

In [8]:
# x = np.ones((3,10))
# y = np.ones((1,10))

In [9]:
x = np.array([
     1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
    12, 1,-3,22, 3,-1, 2,31,31, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
]).reshape(3,-1)
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9,  0],
       [12,  1, -3, 22,  3, -1,  2, 31, 31,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1]])

In [10]:
y = 2*x[0,:] + np.random.normal(0,0.25,10).reshape(1,-1)
y

array([[ 1.73479592,  3.80050051,  6.01862263,  7.87977243,  9.68051088,
        11.75259277, 13.68534001, 15.83761103, 17.70600538, -0.29470625]])

# define MLP

In [2705]:
class AbstractAF:
    """Abstract Activation Function.
    This is an abstract class used to represent
    activation functions in a MultiLayer Perceptitron.

    Each Activation function must have a name
    and implement the following three functions:
    - fw(w,x)         represents a forward pass through the MLP
    - bp_w(w,x)       represents a dL/dw backprop through the MLP
    - bp_x(w,x)       represents a dL/dh backprop through the MLP <- TODO look into this.....

    This class does not implement any of the three functions.
    Child-classes MUST implement all three functions for 
    backprop to work properly.

    In the current implementation, the following classes are the only valid subclasses:
    - LinearAF
    - ReluAF
    """
    def __init__(self):
        self.name = "Abstract"

    def __repr__(self):
        """Overwrites the representation with class name.
        This function makes the print look cleaner :) 
        """
        return f"<ActivationFunction:{self.name}>"
    
    def fw(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

    def bp_w(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

    def bp_x(self,w,x):
        raise NotImplementedError("Abstract Class cannot run functions.  Please use a subclass.")

class MeanSquaredErrorAF(AbstractAF):
    """Mean Squared Error function"""
    def __init__(self):
        super().__init__()
        self.name = "MSE"
        self.axis = 0

    def fw(self,f,y):
        return   np.mean((f-y)**2,axis=self.axis)

    def bp(self,f,y):
        return 2*np.mean((f-y),   axis=self.axis)

class LinearAF(AbstractAF):
    """Linear Activation Function"""
    def __init__(self):
        super().__init__()
        self.name = "Linear"
    
    def fw(self,w,x):
        return w.T.dot(x)

    def bp_w(self,w,x):
        return x

    def bp_x(self,w,x):
        return w

class ReluAF(AbstractAF):
    """Relu Activation Function"""
    def __init__(self):
        super().__init__()
        self.name = "Relu"
        
    def fw(self,w,x):
        return np.maximum(0,w.T.dot(x))

    def bp_w(self,w,x):
        print("wtx:",(w.T.dot(x) > 0).shape)
        print("x:",x.shape,"(expected)")
        return x.dot((w.T.dot(x) > 0).T)

    def bp_x(self,w,x):
        print("wtx:",(w.T.dot(x) > 0).shape)
        print("w:",w.shape,"(expected)")
        return (w).dot(w.T.dot(x) > 0)

In [2742]:
class MLP:
    """MultiLayer Perceptron
    Implementation Notes:
    - input and output layers must be defined explicitly.
    """
    def __init__(self):
        self.layers  = []
        self.weights = []
        self.loss = MeanSquaredErrorAF()

    def add_layer(self,nodes:int,afunc:AbstractAF) -> None:
        """Adds a layer with a given number of nodes
        and a given Abstract Function"""
        self.layers.append(MLPLayer(nodes,afunc))

    def _init_weights(self) -> None:
        """Initialize weights based on added layers"""
        assert len(self.layers) > 1, "layers must be added"

        # reset weights matrix
        self.weights = []

        # get the shape based on existing layers
        for i in range(1,len(self.layers)):
            w_shape = (self.layers[i-1].get_nodes(),
                       self.layers[i  ].get_nodes())
            self.weights.append(np.ones(w_shape)*0.01)

    def fw(self,x:np.array):
        """Performs a forward pass from
        x through n hidden layers to f_w(x)
        by applying an activation function 
        for each layer in the MLP.

        The function also initializes weight
        dimensions, if not done so already.

        Given the input example:
        x_ample = np.ones((3,n))
        
        each column would represent a sample
        ie: 
        > x_ample[:,0]   would be the 1st sample
        > x_ample[:,1]   would be the 2nd sample
        > x_ample[:,n-1] would be the nth sample
        etc.
        
        each row would represent a variable
        ie:
        > x_ample[0,:] would be the 1st parameter
        > x_ample[1,:] would be the 2nd parameter
        > x_ample[2,:] would be the 3rd parameter
        etc.

        The output of this function will generally take the shape:
        (m,n) where n is the number of columns in the input array
        and m is the number of node is the final layer in this MLP.
        In this case, we are predicting one value, how late the
        MBTA will be, and therefore m will always be 1.
        """

        # init weights if not yet done
        if len(self.weights) == 0:
            self._init_weights()

        # initialize x as the hidden value
        # of layer 0 (the input layer)
        self.layers[0].h = x

        # loop through and update x iteratively:
        for i in range(1,len(self.layers)):
            x = self.layers[i].fw(self.weights[i-1],x)

        # return x
        return x
    
    def _bp_list_factors(self,ridx,debug:bool=False):
        """Gets a list of factors to
        generate the corresponding
        weight matrix.
        
        ridx is the reversed index:
        - 0 refers to the last element
        - 1 refers to the 2nd to last element
        etc.
        """
        reversed_weights = list(reversed(self.weights))
        reversed_layers  = list(reversed(self.layers))

        # store factors to prod later 
        prod_factors = []

        # loop through the layers add dh
        for i in range(ridx):
            if debug:
                print(f"""iteration:[{i}]:\n
                layer.h: {reversed_layers[i+1].h.shape}\n
                weight : {reversed_weights[i].shape}\n
                dotable: {...}\n
                """)
            
            # print(f"{reversed_layers[i+1]}.bp_x(...); shape:{reversed_weights[i].shape}")
            prod_factors.append(reversed_layers[i+1].bp_x(reversed_weights[i]))

        # add dw
        # print(f"{reversed_layers[ridx+1]}.bp_w(...); shape:{reversed_weights[ridx].shape}")
        prod_factors.append(reversed_layers[ridx+1].bp_w(reversed_weights[ridx]))

        # return factors
        return prod_factors

    def _bp_dot(self,bp_list,loss,debug:bool=False):
        """bp_list is the list generated from _bp_list_factors()
        loss is the VALUES of loss as a matrix
        """
        prod_dh = loss.copy()
    
        # ignore the last value b/c it's dw not dh
        for i in range(len(bp_list) - 1):
            # perform a cumulative dot product
            # starting from back:
            if debug:
                print(f"""iteration:[{i}]:\n
                bp_list: {bp_list[i].shape}\n
                prod_dh: {prod_dh.shape}\n
                dotable: {bp_list[i][1]==prod_dh.shape[0]}\n
                """)
                
            try:
                prod_dh = bp_list[i].dot(prod_dh)
            except:
                prod_dh = bp_list[i] * (prod_dh)
            
    
        # dot dw with the prod_dh transpose
        dldw = bp_list[-1].dot(prod_dh.T)
        return dldw
    
    def gd(self,
           x:np.array,
           y:np.array,
           eta:float=0.1,
           iters:int=10,
           debug:bool=False
          ):
        # list of errors?
        ls_mse = []
        
        for i in range(iters):
            # compute the fwd pass
            fwp = self.fw(x)
            # compute the loss
            fwl = self.loss.fw(f=fwp,y=y).reshape(1,-1)
            bpl = self.loss.bp(f=fwp,y=y).reshape(1,-1)
            for fidx in range(len(self.weights)):
                ridx = len(self.weights) - fidx - 1
                bpd = self._bp_dot(self._bp_list_factors(fidx),bpl,debug=debug)
                    
                if debug:
                    print(f"shape match: {self.weights[ridx].shape == bpd.shape}")
                    print(f"    self.weights[{ridx}]",self.weights[ridx].shape)
                    print(f"    self._bp_dot[{ridx}]",bpd.shape)

                if bpd.shape == self.weights[ridx].shape:
                    # overwrite the weights if the shapes match:
                    self.weights[ridx] = (self.weights[ridx] - eta * bpd)
                else:
                    # throw error otherwise
                    raise Exception("invalid weight shape"+
                                    f"expected{self.weights[ridx].shape}; got{bpd.shape}")
            
            ls_mse.append(fwl)
        return ls_mse

In [2744]:
class MLPLayer:
    """Represents a single layer in the MLP.
    
    """
    def __init__(self,nodes,afunc):
        self.nodes = int(nodes)
        self.afunc = afunc
        self.h = None

    def __repr__(self):
        """overwrite representation for pretty print"""
        return "<MLPLayer: {nodes:"+f"{self.nodes},afunc:{self.afunc}"+"}>"

    def get_nodes(self):
        return self.nodes+0

    def fw(self,w:np.array,x:np.array):
        """store and return the
        post-activation values 
        of a forward pass."""
        self.h = self.afunc.fw(w=w,x=x)
        return self.h.copy()

    def bp_w(self,w:np.array):
        return self.afunc.bp_w(w=w,x=self.h)

    def bp_x(self,w:np.array):
        return self.afunc.bp_x(w=w,x=self.h)

In [2747]:
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9,  0],
       [12,  1, -3, 22,  3, -1,  2, 31, 31,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1]])

In [2749]:
y

array([[ 1.73479592,  3.80050051,  6.01862263,  7.87977243,  9.68051088,
        11.75259277, 13.68534001, 15.83761103, 17.70600538, -0.29470625]])

In [2751]:
def assertweights(mlp):
    assert mlp.weights[0].shape == (3,4)
    assert mlp.weights[1].shape == (4,5)
    assert mlp.weights[2].shape == (5,1)

In [2753]:
def assertlayers(mlp):
    assert mlp.layers[0].h.shape == (3,10)
    assert mlp.layers[1].h.shape == (4,10)
    assert mlp.layers[2].h.shape == (5,10)
    assert mlp.layers[3].h.shape == (1,10)

In [2755]:
mlp = MLP()
mlp.add_layer(3,LinearAF()) # input x
mlp.add_layer(4,LinearAF())   # hidden layer #1
mlp.add_layer(5,LinearAF())   # hidden layer #2
mlp.add_layer(1,ReluAF())   # prediction f_w(x)
mlp._init_weights()

# assert weight shape
assertweights(mlp)

# run fw pass and assert shapes:
mlp.fw(x)

# assert weights / matricies shape
assertweights(mlp)
assertlayers(mlp)

# print("# product list for w2")
# bplf0 = mlp._bp_list_factors(0)
# assert len(bplf0) == 1
# assert bplf0[0].shape == (5,10), \
# f"""expected: {(5,10)}; got: {bplf0[0].shape}\n"""   # takes shape of h

# print("# product list for w1")
# bplf1 = mlp._bp_list_factors(1)
# assert len(bplf1) == 2
# assert bplf1[0].shape == (5,1),  \
# f"""expected: {(5,1)};  got: { bplf1[0].shape}\n"""  # takes shape of w
# assert bplf1[1].shape == (4,10), \
# f"""expected: {(4,10)}; got: { bplf1[1].shape}\n"""  # takes shape of h

# print("# product list for w0")
# bplf2 = mlp._bp_list_factors(2)
# assert len(bplf2) == 3
# assert bplf2[0].shape == (5,1),  \
# f"""expected: {(5,1)};  got: { bplf2[0].shape}\n"""  # takes shape of w
# assert bplf2[1].shape == (4,5),  \
# f"""expected: {(4,1)};  got: { bplf2[1].shape}\n"""  # takes shape of w
# assert bplf2[2].shape == (3,10), \
# f"""expected: {(3,10)}; got: { bplf2[2].shape}\n"""  # takes shape of h

mlp.gd(x,y,eta=0.00002,iters=1,debug=True);

shape match: True
    self.weights[2] (5, 1)
    self._bp_dot[2] (5, 1)
iteration:[0]:

                bp_list: (5, 1)

                prod_dh: (1, 10)

                dotable: [False]

                
shape match: True
    self.weights[1] (4, 5)
    self._bp_dot[1] (4, 5)
iteration:[0]:

                bp_list: (5, 1)

                prod_dh: (1, 10)

                dotable: [False]

                
iteration:[1]:

                bp_list: (4, 5)

                prod_dh: (5, 10)

                dotable: [False False False False False]

                
shape match: True
    self.weights[0] (3, 4)
    self._bp_dot[0] (3, 4)


In [2757]:
mlp._bp_list_factors(1)[0].shape

(5, 1)

In [2759]:
mlp._bp_list_factors(1)[1].shape

(4, 10)

In [2762]:
mlp.gd(x,y,eta=0.0002,iters=1000,debug=False);

In [2764]:
x_test = np.array([10,15,-1,
                   22,-1, 3,
                    1, 1, 1,]).reshape(3,-1)
# expect y_test to be ~20, ~30, -2

mlp.fw(x_test)

array([[19.67337392, 29.49631076,  0.        ]])

In [2767]:
mlp.weights

[array([[ 8.68199078e-01,  8.68199078e-01,  8.68199078e-01,
          8.68199078e-01],
        [ 5.83538476e-04,  5.83538476e-04,  5.83538476e-04,
          5.83538476e-04],
        [-2.75651652e-02, -2.75651652e-02, -2.75651652e-02,
         -2.75651652e-02]]),
 array([[0.27259008, 0.27259008, 0.27259008, 0.27259008, 0.27259008],
        [0.27259008, 0.27259008, 0.27259008, 0.27259008, 0.27259008],
        [0.27259008, 0.27259008, 0.27259008, 0.27259008, 0.27259008],
        [0.27259008, 0.27259008, 0.27259008, 0.27259008, 0.27259008]]),
 array([[0.41634832],
        [0.41634832],
        [0.41634832],
        [0.41634832],
        [0.41634832]])]

In [None]:
# example input:
x_ample = np.ones((3,5))

"""
Given the imput example:
x_ample = np.ones((3,5))

each column would represent a sample
ie: 
> x_ample[:,0] would be the first weather sample
> x_ample[:,1] would be the second weather sample
etc.

each row would represent a variable
ie:
> x_ample[1,:] would be all the tempertures
> x_ample[2,:] would be all the precipitations
> x_ample[3,:] would be all the biases
etc.
"""

# going to print an instance of a forward pass
print(
    mlp.fw(x_ample)
)
"""
mlp.fw(x_ample)
is a matrix that looks like this:
np.array([[0,0,0,0,0]]) # for now:

again, each column would represent
a prediction of y.

If we had multiple rows, as an output,
it could / would be displayed here.
and we could do some sort of 
verification with those.
""";

In [None]:
test_layers = mlp.layers.copy()
test_layers

In [None]:
test_weights = mlp.weights.copy()
test_weights

In [None]:
def test_bp(ridx):
    """ridx is the reversed index:
    - 0 refers to the last element
    - 1 refers to the 2nd to last element
    etc.
    """
    reversed_weights = list(reversed(test_weights))
    reversed_layers  = list(reversed(test_weights))
    
    prod_factors = []
    
    for i in range(ridx):
        prod_factors.append(f"reversed_layers[{i}].bp_x(...)")

    prod_factors.append(f"reversed_layers[{ridx}].bp_w(...)")
    return prod_factors

In [None]:
test_bp(0)

In [None]:
test_bp(1)

In [None]:
test_bp(2)

In [None]:
test_bp(3)

In [None]:
list(reversed(test_weights))

In [None]:
for i in range(5):
    print(i)

In [None]:
for i in range(0):
    print(i)

In [None]:
for i in range(10):
    print(10-1-i)

In [2685]:
mlp = MLP()
mlp.add_layer(2,LinearAF()) # input x
mlp.add_layer(2,ReluAF())   # hidden layer #1
mlp.add_layer(1,LinearAF())   # prediction f_w(x)
mlp._init_weights()

# run fw pass and assert shapes:
mlp.fw(x[:2,])

array([[0.0026, 0.0006, 0.    , 0.0052, 0.0016, 0.001 , 0.0018, 0.0078,
        0.008 , 0.    ]])

In [2687]:
mlp.weights

[array([[0.01, 0.01],
        [0.01, 0.01]]),
 array([[0.01],
        [0.01]])]

In [2689]:
mlp.layers

[<MLPLayer: {nodes:2,afunc:<ActivationFunction:Linear>}>,
 <MLPLayer: {nodes:2,afunc:<ActivationFunction:Relu>}>,
 <MLPLayer: {nodes:1,afunc:<ActivationFunction:Linear>}>]

In [2691]:
def bp_fw_list(idx,loss):
    """Use the "forward index"
    to procure the derivative
    of a weight."""
    weight = mlp.weights[idx]
    wshape = weight.shape
    
    # init list and append loss
    bp_list = []
    bp_list.append(loss) # 1xn
    if idx == len(mlp.weights) - 1:
        bp_list.append(mlp.layers[idx].h.T)
    if idx == len(mlp.weights) - 2:
        bp_list.append(mlp.weights[idx])
        bp_list.append(mlp.layers[idx-1].h.T)

    return bp_list

In [2695]:
bp_fw_list(1,np.ones((1,1))) # get w2

[array([[1.]]),
 array([[0.13, 0.13],
        [0.03, 0.03],
        [0.  , 0.  ],
        [0.26, 0.26],
        [0.08, 0.08],
        [0.05, 0.05],
        [0.09, 0.09],
        [0.39, 0.39],
        [0.4 , 0.4 ],
        [0.  , 0.  ]])]

In [2701]:
bp0 = bp_fw_list(0,np.ones((1,1))) # get w1
bp0

[array([[1.]]),
 array([[0.01, 0.01],
        [0.01, 0.01]]),
 array([[0.0026],
        [0.0006],
        [0.    ],
        [0.0052],
        [0.0016],
        [0.001 ],
        [0.0018],
        [0.0078],
        [0.008 ],
        [0.    ]])]