In [6]:
import numpy as np
import pandas as pd

In [362]:
import pandas as pd
import numpy as np 


class NeuralNetwork:
    """
    Layers: an Integer value representing the total number of hidden layers in the network (input and output layers are extra)
    Nodes: an integer array of size [0,..,Layers+1] containing the dimensions of the neural network. 
    Nodes[0] shall represent the input size (typically, 50), Nodes[Layers+1] shall represent the number of output nodes (typically, 1). 
    All other values Nodes[i] represent the number of nodes in hidden layer i.

    NNodes: a possible alternative to the Nodes parameter for situations where you want each hidden layer of the neural network to be of the same size. 
    In this case, the size of the output layer is assumed to be 1, and the size of the input layer can be inferred from the dataset.

    Activations: an array of size [0,..,Layers+1] (for the sake of compatibility) in which Activations[0] and Activations[Layers+1] are not used, while all other Activations[i] values are labels indicating the activation function used in layer i. 
    This allows you to build neural networks with different activation functions in each layer.
    """
    def __init__(self,Nodes,Activations):



        self.Layers = len(Nodes) - 2
        self.Nodes = Nodes
        self.Activations = Activations
        self.parameter_dict ={}
        self.rate = 0.1

    def initialize_net(self):
        '''
        parameter dict format:
        layer: {w,h,z,delta, activation}
        '''
        parameter_dict = {k: {'w':0, 'h':0, 'z':0, 'delta':0, 'activation':0, 'gradient':0} for k in range(len(self.Nodes))}
        for i in range(1,len(self.Nodes)):
            # add the intercept
            bias = 1
            
            h = np.matrix(np.append(1,np.random.randn(self.Nodes[i])))
            w = np.matrix(np.random.randn((self.Nodes[i-1]+1),(self.Nodes[i]+1)))
            z = np.matrix(np.zeros(self.Nodes[i]+1))
            delta = np.matrix(np.random.randn(self.Nodes[i]+1))
            gradient = np.matrix(np.zeros((self.Nodes[i-1]+1,self.Nodes[i]+1)))
            
#             h = np.matrix(np.random.randn(self.Nodes[i]))
#             w = np.matrix(np.random.randn((self.Nodes[i-1]),(self.Nodes[i])))
#             z = np.matrix(np.zeros(self.Nodes[i]))
#             delta = np.matrix(np.random.randn(self.Nodes[i]))
#             gradient = np.matrix(np.zeros((self.Nodes[i-1],self.Nodes[i])))
            
            activation = self.Activations[i-1]

            parameter_dict[i] = {'w':w, 'h':h, 'z':z, 'delta':delta, 'activation':activation, 'gradient':gradient}
        parameter_dict['y_hat'] = np.random.randint(100000,size =1)[0]
        self.parameter_dict = parameter_dict
        return parameter_dict
    
    def activate(self, z,activation='relu'):
        if activation =='relu':
            if z > 0:
                return z
            else:
                return 0
        if activation == 'sigmoid':
            return (1/(1+np.e** -z))
        
    def activate_prime(self, z, activation='relu'):
        if activation == 'relu':
            if z > 0:
                return 1
            return 0
        
    def forward_propogate(self,data):
        '''
        Assume data is an np.matrix with shape (1,n)
        '''
        self.parameter_dict[0]['h'] = data
        for l in range(1, self.Layers + 1):
            self.parameter_dict[l]['z'] = np.dot()
#         new = {}
#         new['z'] = []
#         new['h'] = []
#         for i in len(self.Nodes-1):
#             #new z value calculated by multiplying node weights and adding bias 
#             new['z'].append(np.matmul(self.parameter_dict[i][1],self.parameter_dict[i+1][0][1:,1:]) + self.parameter_dict[i][0][0][0])
#             new['h'].append(activate(newz,self.Activations[i]))
#         return new

In [363]:
nodes = [3,2,2,1]
activation = ['relu','relu','relu','relu']
layers = len(nodes) - 1
net = NeuralNetwork(nodes,activation)

In [364]:
net.initialize_net()
net.rate
data = np.matrix(np.append(1,np.random.randn(nodes[0])))
net.parameter_dict[0] = {'h':data}

In [319]:
data = np.matrix([1,-2,2])
net.parameter_dict[0]['h'] = data
net.parameter_dict[1]['w'] = np.matrix([[3.,1],[2,0],[1,-1]])
net.parameter_dict[2]['w'] = np.matrix([[-2.,4],[0,-3]])
net.parameter_dict[3]['w'] = np.matrix([[5.,2]])
net.parameter_dict[1]['z'] = np.matrix([1.,-1])
net.parameter_dict[2]['z'] = np.matrix([-2.,4])
net.parameter_dict[1]['h'] = np.matrix(np.apply_along_axis(net.activate, 0, net.parameter_dict[1]['z']))
net.parameter_dict[2]['h'] = np.matrix(np.apply_along_axis(net.activate, 0, net.parameter_dict[2]['z']))
net.parameter_dict[3]['delta'] = np.array([5.]).reshape((1,1))
w3 = net.parameter_dict[3]['w']
z2 = net.parameter_dict[2]['z']
d3 = net.parameter_dict[3]['delta']
net.parameter_dict[2]['delta'] = np.dot(d3,np.dot(w3, np.diag(np.apply_along_axis(net.activate_prime, 0, z2))))
d2 = net.parameter_dict[2]['delta']
w2 = net.parameter_dict[2]['w']
z1 = net.parameter_dict[1]['z']
net.parameter_dict[1]['delta'] = np.dot(d2, np.dot(w2.T, np.diag(np.apply_along_axis(net.activate_prime, 0, z1))))




In [365]:
# IMPORTANT PART
# UPDATES GRADIENT ASSUMING FORWARD PROP HAS BEEN RUN AND DELTAS HAVE BEEN CALCULATED
for l in range(layers, 0, -1):
    net.parameter_dict[l]['gradient'] += net.rate * np.dot(net.parameter_dict[l - 1]['h'].T, net.parameter_dict[l]['delta'])

In [361]:
net.parameter_dict[1]

{'w': matrix([[-0.5026567 , -0.74275387, -1.14755031],
         [-1.7088462 , -1.69649097, -0.39705638],
         [ 0.18734045, -0.63625343,  0.1105143 ],
         [ 0.04401923, -1.02881815, -1.41719269]]),
 'h': matrix([[ 1.        , -0.728575  , -0.74248112]]),
 'z': matrix([[0., 0., 0.]]),
 'delta': matrix([[0.90681126, 1.52612745, 0.50289372]]),
 'activation': 'relu',
 'gradient': matrix([[ 0.09068113,  0.15261275,  0.05028937],
         [-0.02621609, -0.04412065, -0.01453876],
         [ 0.04571724,  0.0769403 ,  0.02535358],
         [ 0.09541183,  0.16057432,  0.05291289]])}

In [347]:
net.parameter_dict[1]

{'w': matrix([[-1.07491235,  0.79665676],
         [-0.21523174, -0.86358744],
         [ 0.49410561, -1.09611215]]),
 'h': matrix([[-0.62553319,  2.79837561]]),
 'z': matrix([[0., 0.]]),
 'delta': matrix([[-0.43099707,  0.55107776]]),
 'activation': 'relu',
 'gradient': matrix([[0., 0.],
         [0., 0.],
         [0., 0.]])}

In [265]:
print(np.dot(w2.T,np.diag(np.apply_along_axis(net.activate_prime, 0, z1)) ))

[[-2.  0.]
 [ 4.  0.]]


In [369]:
np.matrix(np.append(1, np.random.randn(9)))

matrix([[ 1.        ,  0.66295077,  0.67952681,  0.92289468, -0.70518447,
          0.42380718, -0.65953213,  0.13498344,  0.57211785, -0.13666202]])