In [1]:
import numpy as np

In [153]:
class Layer:

    def relu(self,X):
        return np.maximum(0,X)
    
    def linear(self,X):
        return X
    
    def sigmoid(self,X):
        return 1/(1 + np.exp(-X))

    def __init__(self,unit_num,act_func,input_size) -> None:


        self.input_size = input_size
        self.unit_num = unit_num
        self.act_func = act_func
        self.weights = np.random.rand(unit_num,input_size)*0.2
        self.bias = np.zeros((unit_num,1))



    def forward_prop(self,inp):
        if self.act_func == "sigmoid":
            
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.sigmoid(linear_output)

            return linear_output,activation_output
        
        elif self.act_func == "relu":
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.relu(linear_output)

            return linear_output, activation_output

    def __str__(self) -> str:
        return f"Layer with {self.unit_num} units and {self.act_func} activation function."

In [170]:
class NN:

    def __init__(self) -> None:
        self.layers = []
        return None
    
    def add_layer(self,input_size,activation_function,layer_size):

        if (len(self.layers) == 0) or (input_size == self.layers[-1].unit_num):
            new_layer = Layer(unit_num=layer_size,act_func = activation_function,input_size=input_size)
            self.layers.append(new_layer)
            print("Added a layer!")
        else:
            raise Exception("Input Size don't match with the last layer's output size!")



        return None
    
    def __str__(self) -> str:

        print(f"Neural Network with {len(self.layers)} layers.")
        for layer in self.layers:
            print(layer)
        
        return ""


    def forward_propagation(self,X):
        caches = [self.layers[0].forward_prop(X)]
       



        for i in range(1,len(self.layers)):
            caches.append(self.layers[i].forward_prop(caches[i-1][1]))
            #print(f"Output of layer {i+1}:\n{caches[i]}")
            #print(f"Shape of layer {i}: {len(caches[i])}")
        
        return caches


    def sigmoid_back(self,dA,cache):
        Z = cache
        s= 1/(1+np.exp(-Z))

        dZ = dA * s * (1-s)
        

        return dZ
    

    def relu_back(seld,dA,cache):
        

        Z = cache
        

        dZ = np.copy(dA)

        dZ[Z <= 0] = 0

        return dZ

            


    def linear_backward(self,dZ,W,b,A_prev):

        m = len(A_prev)

        dW = (1/m) * np.dot(dZ,A_prev.T)

        db = (1/m) * np.sum(dZ,axis=0,keepdims=True)
        

        dA_prev = np.dot(W.T,dZ)

        return dA_prev, db, dW
    


    def linear_activation_backward(self,dA,activation,cache,W,b):



        linear_cache, activation_cache = cache

        if activation == "sigmoid":
            dZ = self.sigmoid_back(dA,activation_cache)
            

            dA_prev, dW, db = self.linear_backward(dZ,W,b,linear_cache)

        elif activation =="relu":

            dZ = self.relu_back(dA,activation_cache)

            dA_prev, dW, db = self.linear_backward(dZ,W,b,linear_cache)
        

        return dA_prev, dW, db    
    
    



    def compute_cost(self,cost_func,Y,yhat,epsilon):

        if cost_func =="crossentropy":
            
            cost = np.sum((1-Y) * (-np.log(1-yhat + epsilon)) - Y * (np.log(yhat + epsilon)))/len(Y)
            print(cost)
            return cost
        

    
    


    def backprop(self,Y,caches):


        '''
        AL: output of forward propagation
        Y : true values
        caches:evlayersery cache of 
        
        
        '''
        AL = caches[-1][1]
        grads = {}

        L = len(self.layers)

        

        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

        


        current_cache = caches[L-1]

        

        grads["dA" + str(L-1)], grads["dW" + str(L)],grads["db" + str(L)] = self.linear_activation_backward(dA = dAL,
                                                               activation=self.layers[-1].act_func,
                                                               cache=current_cache,
                                                               W = self.layers[-1].weights,
                                                               b = self.layers[-1].bias)
        
        


        for l in reversed(range(L-1)):

            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(dA = grads["dA" + str(l+1)],
                                                                             activation = self.layers[l].act_func,
                                                                             cache = current_cache,
                                                                             W = self.layers[l].weights,
                                                                             b = self.layers[l].bias)
            grads["dA" + str(l)] = dA_prev_temp
            grads["dW" + str(l + 1)] = dW_temp
            grads["db" + str(l + 1)] = db_temp
            print(grads["db" + str(l + 1)].shape)

            

        return grads
    

    
    def update_params(self, grads,alpha):

        
        '''
        alpha: learning rate
        '''
        L = len(self.layers)

        

    # Update rule for each parameter
        for l in range(len(self.layers)):
            self.layers[l].weights = self.layers[l].weights - alpha * grads["dW" + str(l+1)]
            self.layers[l].bias = self.layers[l].bias - alpha * grads["db" + str(l+1)]
            print(f"Weights shape: {self.layers[l].weights.shape} --- Bias shape: {self.layers[l].bias.shape}")
            print(self.layers[l].bias)

        
        return "Done!"






In [171]:
X = np.array([[1],[2],[3]])
Y = np.array([[0]])


deneme = NN()
deneme.add_layer(3,"sigmoid",5)
deneme.add_layer(5,"sigmoid",10)
deneme.add_layer(10,"sigmoid",4)
deneme.add_layer(4,"relu",1)


Added a layer!
Added a layer!
Added a layer!
Added a layer!


In [172]:
grads = deneme.backprop(Y,deneme.forward_propagation(X))

(4, 4)
(10, 10)
(5, 5)


In [173]:
deneme.update_params(grads,0.05)

Weights shape: (5, 3) --- Bias shape: (5, 5)
[[-4.60111850e-06 -2.92874357e-06 -5.11275961e-06 -1.62946238e-06
  -1.33110472e-06]
 [-3.90363456e-06 -2.48477509e-06 -4.33771595e-06 -1.38245204e-06
  -1.12932244e-06]
 [-5.63654920e-06 -3.58782484e-06 -6.26332947e-06 -1.99615482e-06
  -1.63065508e-06]
 [-1.75180205e-06 -1.11507213e-06 -1.94660119e-06 -6.20391658e-07
  -5.06796765e-07]
 [-3.80380008e-06 -2.42122759e-06 -4.22677995e-06 -1.34709618e-06
  -1.10044030e-06]]
Weights shape: (10, 5) --- Bias shape: (10, 10)
[[-2.83182960e-06 -4.14496866e-06 -5.72684691e-06 -3.58046000e-06
  -2.51225985e-06 -4.69905945e-06 -5.35638770e-06 -3.05930766e-06
  -2.76775351e-06 -3.80182570e-06]
 [-2.89264714e-06 -4.23398772e-06 -5.84983904e-06 -3.65735544e-06
  -2.56621418e-06 -4.79997839e-06 -5.47142370e-06 -3.12501061e-06
  -2.82719493e-06 -3.88347528e-06]
 [-3.28432336e-06 -4.80728693e-06 -6.64193110e-06 -4.15257628e-06
  -2.91369005e-06 -5.44991504e-06 -6.21227678e-06 -3.54814980e-06
  -3.21000866e-

'Done!'

In [115]:
deneme.forward_propagation(X)[-1][1]

ValueError: operands could not be broadcast together with shapes (10,5) (10,10) 

In [93]:
for i in reversed(range(2)):
    print(i)

1
0
