In [1]:
import numpy as np

In [2]:
class EVGONeuralNetwork:
    
    def __init__(self, nlayers, nnodes, activations, t):
        assert nlayers == len(nnodes) - 2
        assert nlayers == len(activations) - 2
        self.nlayers = nlayers
        self.nnodes = nnodes
        self.activations = activations
        self.weights = self.initialize_weights()
        self.best_weights = self.weights[0]
        self.z = []
        self.h = []
        self.t = t
        
    def initialize_weights(self):
        all_weights = []
        for k in range(self.t):
            weights = []
            for i in range(self.nlayers+1):
                weights_i = []
                for j in range((self.nnodes[i]+1)*self.nnodes[i+1]):
                    weights_i.append(np.random.uniform(-0.25,0.25))
                weights.append(weights_i)
            all_weights.append(weights)
        return all_weights
    
    def chunker(self, seq, size):
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))
    
    def forward_prop(self, input_data):
        assert len(input_data[0]) == self.nnodes[0]
        self.z=[]
        inputs=[0]
        z=[]
        inputs[0] = np.repeat(1,len(input_data))
        for i in range(len(input_data[0])):
            add = np.array([j[i] for j in input_data])
            inputs.append(add)
            z.append(add)
        self.z.append(z)
        self.h = self.z.copy()
        for i in range(self.nlayers+1):
            new_nodes = [0] * self.nnodes[i+1]
            p = 0
            #go through the weights for each input node one group at a time 
            for w in self.chunker(self.best_weights[i], len(new_nodes)):
                for j in range(len(w)):
                    # add the value corresponding to the jth node in the next layer
                    new_nodes[j]= new_nodes[j]+ w[j]*inputs[p]
                p += 1
            self.z.append(new_nodes)
            self.h.append([self.activations[i+1](j) for j in new_nodes])
            inputs = self.h[i+1].copy()
            inputs.insert(0,np.repeat(1,len(inputs[0])))
        return self.z[self.nlayers+1]
    
    def back_prop(self, y_pred, y, rate, derivs):
        deltas = y_pred - y[0]
        new_weights = []
        for layer in range(self.nlayers,-1,-1):
            i=0
            new_w = []
            n = len(self.h[layer][0])
            h_vals = self.h[layer].copy()
            h_vals.insert(0,np.repeat(1,n))
            for h in h_vals:
                for d in deltas:
                    #sum weight changes across observations
                    changes = np.sum(h*d)
                    old_w = self.weights[layer][i]
                    new_w.append(old_w - rate*changes)
                    i+=1
            new_weights.insert(0,new_w)
            i=len(self.z[layer+1])
            new_deltas = []
            for z in self.z[layer]:
                new_d=0
                for d in deltas:
                    new_d += derivs[layer](z)*d*self.weights[layer][i]
                    i+=1
                new_deltas.append(new_d)
            deltas = new_deltas
        self.weights = new_weights
                
    def gradient_descent(self, data, y_val, rate, batch_size, derivs, tol):
        diff = 100
        new_MSE = 0
        while diff > tol:
            old_MSE = new_MSE
            new_MSE = 0
            prev = 0
            while prev < len(data):
                nxt = prev+batch_size
                if nxt > len(data):
                    nxt = len(data)
                xs = data.values[prev:nxt]
                ys = y_val.values[prev:nxt]
                prev = nxt
                y_pred = self.forward_prop(xs)
                self.back_prop(y_pred,ys,rate,derivs)
                new_MSE += np.sum((self.forward_prop(xs)[0]-ys[0])**2)
            new_MSE = new_MSE/len(data)
            diff = abs(old_MSE - new_MSE)
            #print("MSE = "+str(new_MSE))
        return new_MSE