## Reference taken from :
https://github.com/TheIndependentCode/Neural-Network

In [31]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [32]:
#Loss function 
def mse(y_true,y_pred):
    return np.mean((y_pred-y_true)**2)

def mse_prime(y_true,y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [33]:
class Layer():
    def __init__(self,n_inputs,n_outputs):
        #self.weights=0.10*np.random.randn(n_outputs,n_inputs)
        self.weights=0.10*np.random.randn(n_outputs,n_inputs)   
        self.bias=np.ones((n_outputs,1))
        
    def forward_propagation(self,inputs):
        self.input=inputs
        self.output=np.dot(self.weights,inputs)+self.bias
        return self.output
    
    def backward_propagation(self,output_gradient,learning_rate):
        weights_gradient=np.dot(output_gradient,self.input.T)
        input_gradient=np.dot(self.weights.T,output_gradient)
        self.weights-=learning_rate*weights_gradient
        self.bias-=learning_rate*output_gradient        
        return input_gradient

In [34]:
class Activation():
    def __init__(self,activation_func,activation_prime_func):
        self.activation=activation_func
        self.activation_prime=activation_prime_func
        
        
    def forward_propagation(self,inputs):
        self.input=inputs
        self.output=self.activation(inputs)
        return self.output
    
    def backward_propagation(self,output_gradient,learning_rate):
        #element wise multiply
        return np.multiply(output_gradient, self.activation_prime(self.input))
        
        

In [35]:
#Activation functions
class Sigmoid(Activation):
    
    def __init__(self):
    
        def sigmoid(z):
            return 1/(1-np.exp(-z))

        def sigmoid_prime(z):
            s=sigmoid(z)
            return s*(1-s)


        super().__init__(sigmoid,sigmoid_prime)

   

In [45]:
def train(network,X_train,y_train,loss,loss_prime,epochs=1000,learning_rate=0.01,verbose=True):
    for e in range(epochs):
        error=0
        for x,y in zip(X_train,y_train):
            
            #forward propagation
            output=x
            for layer in network:
                #output=layer.forward_propagation(output-np.max(output))
                output=layer.forward_propagation(output)
                #print('output = ',output)
            
            #Loss
            #output_clip=np.clip(output,1e-7,1-1e-7)
            error+=loss(y,output)
            
            #Backward propagation
            
            #error derivative (for last layer)
            grad=loss_prime(y,output)
            #print('y=',y,' output= ',output)
            #print('last layer grad = ',grad)
            
            for layer in reversed(network):
                grad = layer.backward_propagation(grad,learning_rate)
                
        error/=len(X_train)
        
        if verbose:
        #if e%50==0:
            print("{}/{} error={}".format(e+1,epochs,error))
            
            

In [51]:
def predict(network,x):
    #same as forward propagation
    output=x
    for layer in network:
        output=layer.forward_propagation(output)
    
    return output
        

In [38]:

X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
#Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))
Y = np.reshape([[0], [0], [0], [1]], (4, 1, 1))  #and operator

network = [
    Layer(2, 3),
    Sigmoid(),
    Layer(3, 1),
    Sigmoid()
]

train(network,  X, Y,mse, mse_prime, epochs=1000, learning_rate=0.01)