In [45]:
import numpy as np

In [91]:
class NeuralNet:
    def __init__(self, layers, activations):
        self.layers = layers
        self.activations = activations

        self.params = self.init_params()

    def init_params(self):
        '''Creates weights for the Neural Net'''
        params = {}

        for n in range(1, len(self.layers)):
            edge = np.sqrt(1/self.layers[n-1])

            # shape (layer, Previous Layer)
            params["weight_l" + str(n)] = np.random.uniform(-edge,
                                                            edge, (self.layers[n], self.layers[n-1]))

            # Bias shape(h,1) - only 1 Bias neuron per layer
            params["bias_l" + str(n)] = np.random.uniform(-edge,
                                                          edge, (self.layers[n], 1))

        return params

    def feedforward(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""

        self.params["output_l0"] = input_signal

        for n in range(1, len(self.layers)):
            self.params["output_l" + str(n)] = (np.dot(self.params["weight_l" + str(
                n)], self.params["output_l" + str(n-1)]) + self.params["bias_l" + str(n)])


nn = NeuralNet(layers=[2, 3, 3, 1], activations=["Relu", "Relu", "Linear"])

input_signal = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
nn.feedforward(input_signal)
 
print("\n".join("{}\n{}".format(k, v) for k, v in nn.params.items()))

weight_l1
[[ 0.69858327 -0.30344136]
 [-0.70299227 -0.42011878]
 [ 0.01695842 -0.43007155]]
bias_l1
[[-0.07624925]
 [ 0.55471414]
 [ 0.05761911]]
weight_l2
[[ 0.19279515  0.50844163 -0.0339121 ]
 [-0.37761449 -0.30753624  0.25153619]
 [-0.12678089 -0.45187239 -0.05443081]]
bias_l2
[[-0.44169107]
 [-0.29700711]
 [-0.44188109]]
weight_l3
[[-0.22563614  0.55779625  0.06987352]]
bias_l3
[[0.56521095]]
output_l0
[[0 0 1 1]
 [0 1 0 1]]
output_l1
[[-0.07624925 -0.37969061  0.62233402  0.31889266]
 [ 0.55471414  0.13459536 -0.14827812 -0.5683969 ]
 [ 0.05761911 -0.37245244  0.07457752 -0.35549403]]
output_l2
[[-0.17630577 -0.43382904 -0.39962794 -0.65715121]
 [-0.4243157  -0.28870866 -0.46764961 -0.33204256]
 [-0.6860104  -0.43429062 -0.45783768 -0.20611789]]
output_l3
[[0.32037624 0.47171244 0.36253753 0.51387374]]


In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(f):
    return np.multiply(f, (1 - f))


def tanh(x):
    return np.tanh(x)


def tanh_derivative(f):
    return 1 - np.power(np.tanh(f), 2)


def relu(x):
    return np.maximum(0, x)


def relu_derivative(z):
    return (z > 0).astype(int)


def noneActivation(x):
    return x    


def noneActivation_derivative(z):
    return 1    


def mean_squared_error(target, output):
    # print('\n Prediction', output)
    # print(target)
    return np.square(target - output).mean()


class MyNeuralNet:
    def __init__(self, input_neurons, hidden_neurons, output_neurons):
        
        # Neural Net Layers and Nodes
        self.i = input_neurons  #
        self.h = hidden_neurons  #
        self.o = output_neurons  #
        
        # Activation functions and derivatives
        self.activation_h =  relu
        self.activation_o =  noneActivation

        self.activation_derivative_h =  relu_derivative
        self.activation_derivative_o =  noneActivation_derivative
        
        # Loss function
        self.loss_function = mean_squared_error
        self.loss = 0
        self.loss_list = []
        
        # Optimizing
        self.lr = 0.001
        self.optimizer = 'ADAM'
        
        # ADAM Optimizer variables
        self.l2_m = 0
        self.l1_m = 0
        self.l3_m = 0
        
        self.l2_v = 0
        self.l1_v = 0
        self.l3_v = 0

        self.lb2_m = 0
        self.lb1_m = 0
        self.lb3_m = 0

        self.lb2_v = 0
        self.lb1_v = 0
        self.lb3_v = 0

        self.t = 0
        
    
    def init_weights(self):     
        '''Creates weights for the Neural Net'''
        # shape (layer, Previous Layer)
        self.weight_h =  np.random.uniform(-np.sqrt(1/self.i) , np.sqrt(1/self.i) ,(self.h, self.i))
        self.weight_o = np.random.uniform(-np.sqrt(1/self.h) , np.sqrt(1/self.h) ,(self.o, self.h))
        # Bias shape(h,1) - only 1 Bias neuron per layer
        self.bias_h =  np.random.uniform(-np.sqrt(1/self.i) , np.sqrt(1/self.i) ,(self.h,1))
        self.bias_o =  np.random.uniform(-np.sqrt(1/self.h) , np.sqrt(1/self.h) ,(self.o, 1))


    def feedforward(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # dot((R1,C1),(R2,C2)) output shape: (R1,C2), C1 == R2
        output_i = input_signal
#         output_i = np.multiply(input_signal,np.random.binomial(1, 1, input_signal.shape)) / np.full(input_signal.shape,0.9)
                               
        output_h = self.activation_h(np.dot(self.weight_h, output_i) + self.bias_h)
#         output_h = np.multiply(output_h, np.random.binomial(1, 1, output_h.shape)) / np.full(output_h.shape,1) # DROPOUT
       
        output_o = self.activation_o(np.dot(self.weight_o, output_h) + self.bias_o)

        return output_i, output_h, output_o #returns tuple
    
    

    def back_propagation(self, output_i, output_h, output_o, target, batch_size):   
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # Target shape : ( output layer size , batch_size)
        # for graph
        self.loss = self.loss_function(target, output_o)
        if random.uniform(0, 1) > 0.05:
            self.loss_list.append(self.loss)  
        

        error_o = -(2/len(target))*(target - output_o)
       
        delta_o =  error_o * self.activation_derivative_o(output_o) 
        
        grad_o = np.dot(delta_o,  output_h.T) / batch_size     # Correct
        grad_bias_o = np.mean(delta_o, axis=1, keepdims=True)   # Correct 
        
  
        delta_h = np.dot(self.weight_o.T, delta_o) * self.activation_derivative_h(output_h)        
        
        grad_h = np.dot(delta_h, output_i.T) / batch_size    # Correct
        grad_bias_h = np.mean(delta_h, axis=1, keepdims=True)  # Correct  
     
        return grad_h ,grad_o, grad_bias_h, grad_bias_o

    
    
    def update_weights(self, grad_h ,grad_o, grad_bias_h, grad_bias_o):
        '''Returns (weight_h, weight_o, weight_bias_h,  weight_bias_o)'''             
                
        if self.optimizer == 'SGD':
            self.sgd_optimizer( grad_h, grad_o, grad_bias_h, grad_bias_o)

        elif self.optimizer == 'ADAM':
            self.adam_optimizer(grad_h, grad_o, grad_bias_h, grad_bias_o)


    def predict(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # dot((R1,C1),(R2,C2)) output shape: (R1,C2), C1 == R2
        
        output_i = input_signal
        output_h = self.activation_h(np.dot(self.weight_h, output_i) + self.bias_h)
        output_o = self.activation_o(np.dot(self.weight_o, output_h) + self.bias_o)

        return output_o #returns tuple
    
#         '''Returns only Output Layer array'''
#         _, _, output_o = self.feedforward(input_signal)
#         return output_o
    

        
    def sgd_optimizer(self, grad_h, grad_o, grad_bias_h, grad_bias_o):
        # Update Weights
        self.weight_o = self.weight_o - self.lr * grad_o
        self.weight_h = self.weight_h - self.lr * grad_h

        self.bias_h = self.bias_h - self.lr * grad_bias_h
        self.bias_o = self.bias_o - self.lr * grad_bias_o


    def adam_optimizer(self, grad_h, grad_o, grad_bias_h, grad_bias_o):
        decay_rate_1 = 0.9
        decay_rate_2 = 0.999
        epsilon = 10**(-8)

        g2 = grad_o        
        g0 = grad_h

        gb2 = grad_bias_o
        gb0 = grad_bias_h

        self.t += 1  # Increment Time Step

        # Computing 1st and 2nd moment for each layer
        self.l3_m = self.l3_m * decay_rate_1 + (1 - decay_rate_1) * g2
        self.l1_m = self.l1_m * decay_rate_1 + (1 - decay_rate_1) * g0
        
        self.l3_v = self.l3_v * decay_rate_2 + (1 - decay_rate_2) * np.square(g2)        
        self.l1_v = self.l1_v * decay_rate_2 + (1 - decay_rate_2) * np.square(g0)
        
        self.lb3_m = self.lb3_m * decay_rate_1 + (1 - decay_rate_1) * gb2        
        self.lb1_m = self.lb1_m * decay_rate_1 + (1 - decay_rate_1) * gb0
        
        self.lb3_v = self.lb3_v * decay_rate_2 + (1 - decay_rate_2) * np.square(gb2)        
        self.lb1_v = self.lb1_v * decay_rate_2 + (1 - decay_rate_2) * np.square(gb0)

        # Computing bias-corrected moment
        l3_m_corrected = self.l3_m / (1 - (decay_rate_1 ** self.t))
        l3_v_corrected = self.l3_v / (1 - (decay_rate_2 ** self.t))

        l1_m_corrected = self.l1_m / (1 - (decay_rate_1 ** self.t))
        l1_v_corrected = self.l1_v / (1 - (decay_rate_2 ** self.t))

        lb3_m_corrected = self.lb3_m / (1 - (decay_rate_1 ** self.t))
        lb3_v_corrected = self.lb3_v / (1 - (decay_rate_2 ** self.t))

        lb1_m_corrected = self.lb1_m / (1 - (decay_rate_1 ** self.t))
        lb1_v_corrected = self.lb1_v / (1 - (decay_rate_2 ** self.t))

        # Update Weights
        w2_update = l3_m_corrected / (np.sqrt(l3_v_corrected) + epsilon)        
        w0_update = l1_m_corrected / (np.sqrt(l1_v_corrected) + epsilon)
        b2_update = lb3_m_corrected / (np.sqrt(lb3_v_corrected) + epsilon)        
        b0_update = lb1_m_corrected / (np.sqrt(lb1_v_corrected) + epsilon)

        self.weight_o -= (self.lr * w2_update)        
        self.weight_h -= (self.lr * w0_update)
        self.bias_o -= (self.lr * b2_update)        
        self.bias_h -= (self.lr * b0_update)

        
    def plot_MSE(self):
        y = [self.loss_list[i] for i in range(len(self.loss_list))]
        x = [x for x in range(len(y))]
        plt.plot(x, y)
        plt.xlabel('iterations')
        plt.title('MSE of the NN')
        plt.show()    
