# Introduction
This 

In [1]:
import numpy as np


In [42]:
class Network:
    
    def __init__(self, architecture, activation= 'sigmoid', learning_rate= 0.001):
        self.architecture= architecture
        #self.activation= activation
        self.lr= learning_rate
        self.parameters=[]
        self.buffered_op={}
        #self.prime_funcs=[]
        
        if isinstance(activation, str):
            if activation.lower().strip() in ['relu', 'sigmoid']:
                if activation.lower().strip()== 'relu':
                    self.activation= [self.relu for x in range(len(self.architecture) - 1)]
                    self.prime_funcs = [self.relu_prime for x in range(len(self.architecture) - 1)]
                else:
                    self.activation= [self.sigmoid for x in range(len(self.architecture) - 1)]
                    self.prime_funcs = [self.sigmoid_prime for x in range(len(self.architecture) - 1)]
                    
            else:
                raise ValueError("activation Value should be either relu or sigmoid")
                    
        
        for i in range(len(architecture) -1):
            layer_parameters= self.create_weights_and_biases(architecture[i], architecture[i + 1])
            self.parameters.append(layer_parameters)
        
        
    def create_weights_and_biases(self, in_nodes, out_nodes):
        std= np.sqrt(2.0/(in_nodes + out_nodes))
        w= np.random.normal(loc= 0, scale= std, size= (in_nodes, out_nodes))
        b= np.zeros((1, out_nodes))
        return {'weight':w , 'bias':b}
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def relu(self, x):
        return 0 if x<0 else x
    
    def sigmoid_prime(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))    
    
    def relu_prime(self, x):
        return 1 if x>0 else 0

    def forward_pass(self, x):
        if x.ndim == 1:
            x= x.reshape((1, -1))
        self.buffered_op['a0']= x
        for i, parameter in enumerate(self.parameters):
            w= parameter['weight']
            b= parameter['bias']
            actv_func= self.activation[i]
            
            h= np.matmul(x, w) + b
            a= actv_func(h)
            self.buffered_op['h' + str(i+1)]= h
            self.buffered_op['a' + str(i+1)]= a
            x= a
        return a
    
    def loss(self, y_cap, y):
        return (y - y_cap) ** 2 #MSE
    
    def loss_prime(self, y_cap, y):
        return -2 * (y - y_cap)
    
    def backpropagation(self,y_cap, y):
        #last_layer= True
        layer_error= self.loss_prime( y_cap, y)
        gradients= []
        batch_size= y_cap.shape[0]
        
        
        for layer_idx in range(len(self.architecture) - 1, 0 , -1):
            prime_func= self.prime_funcs[layer_idx - 1]
            h_cur= self.buffered_op['h'+str(layer_idx)]
            a_prev= self.buffered_op['a'+str(layer_idx - 1)]
            
            
            error_term= layer_error * prime_func(h_cur)
            del_w= np.matmul(error_term.T , a_prev)
            del_b= error_term.sum(axis= 0)
            gradients.append((del_w, del_b))
            
            #updating layer_error term for next iteration
            layer_error= np.matmul(error_term , self.parameters[layer_idx -1]['weight'].T)
            
        gradients.reverse()
        
        #updating the weights:
        for i in range(len(self.parameters)):
            self.parameters[i]['weight'] -= (self.lr / batch_size) * gradients[i][0].T
            self.parameters[i]['bias'] -= (self.lr / batch_size) * gradients[i][1]
            
            

In [43]:
nn= Network([5, 3, 1], learning_rate= 0.1)

In [44]:
ip= np.array([[1,3,5,7,9],[2,3,4,1,1]])
op= nn.forward_pass(ip)
print(op)

[[0.41128373]
 [0.44070925]]


In [53]:
exp_op= np.array([[1.0], [0.0]])
nn.backpropagation(op, exp_op)

In [54]:
ip= np.array([[1,3,5,7,9],[2,3,4,1,1]])
op= nn.forward_pass(ip)
print(op)

[[0.44039955]
 [0.45590399]]
