In [2]:
import numpy as np

In [9]:
def sigmoid(x, derivative=False):
    if derivative:
        return np.multiply(x, (1 - x))
    return 1 / (1 + np.exp(-x))


def relu(x, derivative=False):
    if derivative:
        return (x > 0).astype(int)
    return np.maximum(0, x)


def linear(x, derivative=False):
    if derivative:
        return 1
    return x


def mse(target, output):
    return np.square(target - output).mean()


In [125]:
class SGD:
    def __init__(self, learning_rate):
        self.lr = learning_rate

    def proceed(self, weights, biases, gradients, gradients_b):        
        for n in range(1, len(weights)):           
            weights[n] = weights[n] - self.lr * gradients[n]
            biases[n] = biases[n] - self.lr * gradients_b[n]        
        return weights, biases
    

In [92]:
class Layer:
    def __init__(self, in_features, out_features, activation=None, input_layer=False):
        self.neurons = out_features        
        self.activation = activation
        self.input_layer = input_layer

        self.weights = self.init_weights(in_features, out_features)
        self.biases = self.init_biases(in_features, out_features)

    def init_weights(self, in_features, out_features):
        if self.input_layer:
            return None

        edge = np.sqrt(1/in_features)
        weights = np.random.uniform(-edge, edge, (out_features, in_features))
        return weights

    def init_biases(self, in_features, out_features):
        if self.input_layer:
            return None

        edge = np.sqrt(1/in_features)
        biases = np.random.uniform(-edge, edge, (out_features, 1))
        return biases

    def feedforward(self, in_signal):
        if self.input_layer:
            return in_signal

        out_signal = self.activation(np.dot(self.weights, in_signal) + self.biases)
        return out_signal    

In [123]:
class NeuralNet1:
    def __init__(self, neurons, activations, loss, optimizer):
        self.layers = self.create_net(neurons, activations)
        self.loss_function = loss
        self.optimizer = optimizer

    def create_net(self, neurons, activations):
        input_layer = Layer(None, neurons[0], input_layer=True)
        layers = [input_layer]

        for n in range(1, len(neurons)):
            layers.append(Layer(neurons[n-1], neurons[n], activations[n]))
        return layers

    def feedforward(self, input_signal):
        outputs = [input_signal]

        for i in range(1, len(self.layers)):
            outputs.append(self.layers[i].feedforward(outputs[i-1]))
        return outputs

    def predict(self, input_signal):
        prediction = self.feedforward(input_signal)[-1]
        return prediction

    def back_propagation(self, outputs, target):
        """The delta rule for single-layered neural networks is a gradient descent method, 
        using the derivative of the network’s weights with respect to the output error to 
        adjust the weights to better classify training examples."""
        batch_size = target.shape[1]
        
        weight_gradients = [None for i in range(len(self.layers))]
        biase_gradients =  [None for i in range(len(self.layers))]
        deltas = [None for i in range(len(self.layers))]

        loss = self.loss_function(target, outputs[-1])
        
        # Output Layer Error and Delta
        output_error = -(2/len(target))*(target - outputs[-1])        
        deltas[-1] = output_error * self.layers[-1].activation(outputs[-1], derivative=True)
        
        # Hidden Layers' Errors and Deltas
        for i in range(-2, -len(self.layers),-1):
            error = np.dot(self.layers[i+1].weights.T, deltas[i+1])
            deltas[i] = error * self.layers[i].activation(outputs[i], derivative=True)

        
        for i in range(-1, -len(self.layers),-1):
            weight_gradients[i] = np.dot(deltas[i],  outputs[i-1].T) / batch_size
            biase_gradients[i] =  np.mean(deltas[i], axis=1, keepdims=True)
            
        return weight_gradients, biase_gradients
    
    
    def update_weights(self, gradients, gradients_b):
        '''Returns updated weights'''
        self.optimizer.proceed(self.weights,self.biases, gradients, gradients_b)
    
    
    


    def print_params(self):
        for layer in self.layers:
            if not layer.input_layer:
                print(layer.weights)
                print(layer.biases, "\n")




In [124]:
nn1 = NeuralNet1(neurons=[2, 3, 4, 1],
                 activations=[None, relu, relu, sigmoid],
                 loss=mse,
                 optimizer=SGD(0.1))

# nn1.print_params()
# print(nn1.layers[2].activation)
input_signal = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
target_signal = np.array([[0], [1], [1], [0]]).T



ff = nn1.feedforward(input_signal)
nn1.back_propagation(ff,target_signal)

([None,
  array([[-0.00415042, -0.00378314],
         [ 0.        ,  0.        ],
         [-0.00063403, -0.00057793]]),
  array([[-0.00673358,  0.        , -0.00246692],
         [ 0.        ,  0.        ,  0.        ],
         [ 0.00257515,  0.        ,  0.00094343],
         [ 0.        ,  0.        ,  0.        ]]),
  array([[ 0.02082788,  0.        , -0.00093487,  0.        ]])],
 [None,
  array([[ 0.01414454],
         [ 0.        ],
         [-0.00137372]]),
  array([[-0.01296496],
         [ 0.        ],
         [ 0.00495823],
         [ 0.        ]]),
  array([[0.02707265]])])

In [103]:
for i in range(-1 , -4,-1):
    print(i)

-1
-2
-3


In [468]:



    
    
    
    
    

class NeuralNet:
    def __init__(self, layers, activations, loss):
        self.layers = layers
        self.layers_num = len(self.layers)        

        self.activations = activations
        self.loss_function = loss

        self.weights, self.biases = self.init_weights()
        self.outputs = [None for o in range(self.layers_num)]

        self.gradients = [None for g in range(self.layers_num)]
        self.gradients_b = [None for b in range(self.layers_num)]
        
        self.lr = 0.1
        self.optimizer = SGD(self.lr)

        
    def init_weights(self):
        '''Creates weights for the Neural Net'''
        weights = [None for w in range(self.layers_num)]
        biases = [None for b in range(self.layers_num)]

        for n in range(1, self.layers_num):
            edge = np.sqrt(1/self.layers[n-1])

            # shape (layer, Previous Layer)
            weights[n] = np.random.uniform(-edge, edge,
                                           (self.layers[n], self.layers[n-1]))

            # Bias shape(h,1) - only 1 Bias neuron per layer
            biases[n] = np.random.uniform(-edge, edge, (self.layers[n], 1))

        return weights, biases

    
    def feedforward(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        self.outputs[0] = input_signal

        for n in range(1, self.layers_num):
            self.outputs[n] = self.activations[n](np.dot(self.weights[n], self.outputs[n-1]) + self.biases[n])

        return self.outputs

    
    def back_propagation(self, target, batch_size):
        loss = self.loss_function(target, self.outputs[-1])

        error_o = -(2/len(target))*(target - self.outputs[-1])

        delta_o = error_o *                              self.activations[-1](self.outputs[-1], derivative=True)

        delta_h = np.dot(self.weights[-1].T, delta_o) *  self.activations[-2](self.outputs[-2], derivative=True)

        delta_h0 = np.dot(self.weights[-2].T, delta_h) * self.activations[-3](self.outputs[-3], derivative=True)

        self.gradients[-1] = np.dot(delta_o,  self.outputs[-2].T) / batch_size
        self.gradients_b[-1] = np.mean(delta_o, axis=1, keepdims=True)

        self.gradients[-2] = np.dot(delta_h,  self.outputs[-3].T) / batch_size
        self.gradients_b[-2] = np.mean(delta_h, axis=1, keepdims=True)

        self.gradients[-3] = np.dot(delta_h0,  self.outputs[-4].T) / batch_size
        self.gradients_b[-3] = np.mean(delta_h0, axis=1, keepdims=True)

        return self.gradients, self.gradients_b

    
    def update_weights(self, gradients, gradients_b):
        '''Returns updated weights'''
        self.weights, self.biases = self.optimizer.optimize(self.weights,
                                                            self.biases, 
                                                            gradients,
                                                            gradients_b)
        
        
    def predict(self, input_signal):
        '''Returns only Output Layer array'''
        # dot((R1,C1),(R2,C2)) output shape: (R1,C2), C1 == R2
        prediction = self.feedforward(input_signal)[-1]
        return prediction

    def print_params(self):
        print(self.weights[1:])
        print(self.biases[1:])
        print(self.outputs[1:])


nn = NeuralNet(layers=[2, 25, 20, 1],
               activations=[None, relu, relu, sigmoid],
               loss=mse)


input_signal = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
target_signal = np.array([[0], [1], [1], [0]]).T

# print(np.shape(target_signal))
for epochs in range(1000):
    nn.feedforward(input_signal)
    gradients = nn.back_propagation(target_signal, 4)
    nn.update_weights(*gradients)
print(nn.predict(input_signal))


# nn.print_params()

[[0.03576583 0.95733211 0.96020589 0.05933778]]


In [210]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(f):
    return np.multiply(f, (1 - f))


def tanh(x):
    return np.tanh(x)


def tanh_derivative(f):
    return 1 - np.power(np.tanh(f), 2)


def relu(x):
    return np.maximum(0, x)


def relu_derivative(z):
    return (z > 0).astype(int)


def noneActivation(x):
    return x    


def noneActivation_derivative(z):
    return 1    


def mean_squared_error(target, output):
    # print('\n Prediction', output)
    # print(target)
    return np.square(target - output).mean()





class MyNeuralNet:
    def __init__(self, input_neurons, hidden_neurons, output_neurons):
        
        # Neural Net Layers and Nodes
        self.i = input_neurons  #
        self.h = hidden_neurons  #
        self.o = output_neurons  #
        
        # Activation functions and derivatives
        self.activation_h =  relu
        self.activation_o =  sigmoid

        self.activation_derivative_h =  relu_derivative
        self.activation_derivative_o =  sigmoid_derivative
        
        # Loss function
        self.loss_function = mean_squared_error
        self.loss = 0
        self.loss_list = []
        
        # Optimizing
        self.lr = 0.1
        self.optimizer = 'SGD'
        
        # ADAM Optimizer variables
        self.l2_m = 0
        self.l1_m = 0
        self.l3_m = 0
        
        self.l2_v = 0
        self.l1_v = 0
        self.l3_v = 0

        self.lb2_m = 0
        self.lb1_m = 0
        self.lb3_m = 0

        self.lb2_v = 0
        self.lb1_v = 0
        self.lb3_v = 0

        self.t = 0
        
    
    def init_weights(self):     
        '''Creates weights for the Neural Net'''
        # shape (layer, Previous Layer)
        self.weight_h =  np.random.uniform(-np.sqrt(1/self.i) , np.sqrt(1/self.i) ,(self.h, self.i))
        self.weight_o = np.random.uniform(-np.sqrt(1/self.h) , np.sqrt(1/self.h) ,(self.o, self.h))
        # Bias shape(h,1) - only 1 Bias neuron per layer
        self.bias_h =  np.random.uniform(-np.sqrt(1/self.i) , np.sqrt(1/self.i) ,(self.h,1))
        self.bias_o =  np.random.uniform(-np.sqrt(1/self.h) , np.sqrt(1/self.h) ,(self.o, 1))


    def feedforward(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # dot((R1,C1),(R2,C2)) output shape: (R1,C2), C1 == R2
        output_i = input_signal
#         output_i = np.multiply(input_signal,np.random.binomial(1, 1, input_signal.shape)) / np.full(input_signal.shape,0.9)
                               
        output_h = self.activation_h(np.dot(self.weight_h, output_i) + self.bias_h)
#         output_h = np.multiply(output_h, np.random.binomial(1, 1, output_h.shape)) / np.full(output_h.shape,1) # DROPOUT
       
        output_o = self.activation_o(np.dot(self.weight_o, output_h) + self.bias_o)

        return output_i, output_h, output_o #returns tuple
    
    

    def back_propagation(self, output_i, output_h, output_o, target, batch_size):   
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # Target shape : ( output layer size , batch_size)
        # for graph
        self.loss = self.loss_function(target, output_o)
#         if random.uniform(0, 1) > 0.05:
#             self.loss_list.append(self.loss)  
        

        error_o = -(2/len(target))*(target - output_o)
       
        delta_o =  error_o * self.activation_derivative_o(output_o) 
        
        grad_o = np.dot(delta_o,  output_h.T) / batch_size     # Correct
        grad_bias_o = np.mean(delta_o, axis=1, keepdims=True)   # Correct 
        
  
        delta_h = np.dot(self.weight_o.T, delta_o) * self.activation_derivative_h(output_h)        
        
        grad_h = np.dot(delta_h, output_i.T) / batch_size    # Correct
        grad_bias_h = np.mean(delta_h, axis=1, keepdims=True)  # Correct  
     
        return grad_h ,grad_o, grad_bias_h, grad_bias_o

    
    
    def update_weights(self, grad_h ,grad_o, grad_bias_h, grad_bias_o):
        '''Returns (weight_h, weight_o, weight_bias_h,  weight_bias_o)'''             
                
        if self.optimizer == 'SGD':
            self.sgd_optimizer( grad_h, grad_o, grad_bias_h, grad_bias_o)

        elif self.optimizer == 'ADAM':
            self.adam_optimizer(grad_h, grad_o, grad_bias_h, grad_bias_o)


    def predict(self, input_signal):
        """ Input signal should be 2D array shape : (Rows = input layer size (self.i) , Columns = batch_size)"""
        # dot((R1,C1),(R2,C2)) output shape: (R1,C2), C1 == R2
        
        output_i = input_signal
        output_h = self.activation_h(np.dot(self.weight_h, output_i) + self.bias_h)
        output_o = self.activation_o(np.dot(self.weight_o, output_h) + self.bias_o)

        return output_o #returns tuple
    
#         '''Returns only Output Layer array'''
#         _, _, output_o = self.feedforward(input_signal)
#         return output_o
    

        
    def sgd_optimizer(self, grad_h, grad_o, grad_bias_h, grad_bias_o):
        # Update Weights
        self.weight_o = self.weight_o - self.lr * grad_o
        self.weight_h = self.weight_h - self.lr * grad_h

        self.bias_h = self.bias_h - self.lr * grad_bias_h
        self.bias_o = self.bias_o - self.lr * grad_bias_o


    def adam_optimizer(self, grad_h, grad_o, grad_bias_h, grad_bias_o):
        decay_rate_1 = 0.9
        decay_rate_2 = 0.999
        epsilon = 10**(-8)

        g2 = grad_o        
        g0 = grad_h

        gb2 = grad_bias_o
        gb0 = grad_bias_h

        self.t += 1  # Increment Time Step

        # Computing 1st and 2nd moment for each layer
        self.l3_m = self.l3_m * decay_rate_1 + (1 - decay_rate_1) * g2
        self.l1_m = self.l1_m * decay_rate_1 + (1 - decay_rate_1) * g0
        
        self.l3_v = self.l3_v * decay_rate_2 + (1 - decay_rate_2) * np.square(g2)        
        self.l1_v = self.l1_v * decay_rate_2 + (1 - decay_rate_2) * np.square(g0)
        
        self.lb3_m = self.lb3_m * decay_rate_1 + (1 - decay_rate_1) * gb2        
        self.lb1_m = self.lb1_m * decay_rate_1 + (1 - decay_rate_1) * gb0
        
        self.lb3_v = self.lb3_v * decay_rate_2 + (1 - decay_rate_2) * np.square(gb2)        
        self.lb1_v = self.lb1_v * decay_rate_2 + (1 - decay_rate_2) * np.square(gb0)

        # Computing bias-corrected moment
        l3_m_corrected = self.l3_m / (1 - (decay_rate_1 ** self.t))
        l3_v_corrected = self.l3_v / (1 - (decay_rate_2 ** self.t))

        l1_m_corrected = self.l1_m / (1 - (decay_rate_1 ** self.t))
        l1_v_corrected = self.l1_v / (1 - (decay_rate_2 ** self.t))

        lb3_m_corrected = self.lb3_m / (1 - (decay_rate_1 ** self.t))
        lb3_v_corrected = self.lb3_v / (1 - (decay_rate_2 ** self.t))

        lb1_m_corrected = self.lb1_m / (1 - (decay_rate_1 ** self.t))
        lb1_v_corrected = self.lb1_v / (1 - (decay_rate_2 ** self.t))

        # Update Weights
        w2_update = l3_m_corrected / (np.sqrt(l3_v_corrected) + epsilon)        
        w0_update = l1_m_corrected / (np.sqrt(l1_v_corrected) + epsilon)
        b2_update = lb3_m_corrected / (np.sqrt(lb3_v_corrected) + epsilon)        
        b0_update = lb1_m_corrected / (np.sqrt(lb1_v_corrected) + epsilon)

        self.weight_o -= (self.lr * w2_update)        
        self.weight_h -= (self.lr * w0_update)
        self.bias_o -= (self.lr * b2_update)        
        self.bias_h -= (self.lr * b0_update)

        
    def plot_MSE(self):
        y = [self.loss_list[i] for i in range(len(self.loss_list))]
        x = [x for x in range(len(y))]
        plt.plot(x, y)
        plt.xlabel('iterations')
        plt.title('MSE of the NN')
        plt.show()    


In [392]:
nnl = MyNeuralNet(2, 25, 1)
nnl.init_weights()


input_signal = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
target_signal = np.array([[0], [1], [1], [0]]).T

# print(np.shape(target_signal))
for epochs in range(1000):
    ff = nnl.feedforward(input_signal)
    gradients = nnl.back_propagation(*ff,target_signal, 4)
    nnl.update_weights(*gradients)
print(nnl.predict(input_signal))

[[0.13122523 0.90812181 0.88557497 0.09602993]]
