In [1]:
import numpy as np

In [153]:
class Layer:

    def relu(self,X):
        return np.maximum(0,X)
    
    def linear(self,X):
        return X
    
    def sigmoid(self,X):
        return 1/(1 + np.exp(-X))

    def __init__(self,unit_num,act_func,input_size) -> None:


        self.input_size = input_size
        self.unit_num = unit_num
        self.act_func = act_func
        self.weights = np.random.rand(unit_num,input_size)*0.2
        self.bias = np.zeros((unit_num,1))



    def forward_prop(self,inp):
        if self.act_func == "sigmoid":
            
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.sigmoid(linear_output)

            return linear_output,activation_output
        
        elif self.act_func == "relu":
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.relu(linear_output)

            return linear_output, activation_output

    def __str__(self) -> str:
        return f"Layer with {self.unit_num} units and {self.act_func} activation function."

In [317]:
class NN:

    def __init__(self) -> None:
        self.layers = []
        return None
    
    def add_layer(self,input_size,activation_function,layer_size):

        if (len(self.layers) == 0) or (input_size == self.layers[-1].unit_num):
            new_layer = Layer(unit_num=layer_size,act_func = activation_function,input_size=input_size)
            self.layers.append(new_layer)
            print("Added a layer!")
        else:
            raise Exception("Input Size don't match with the last layer's output size!")



        return None
    
    def __str__(self) -> str:

        print(f"Neural Network with {len(self.layers)} layers.")
        for layer in self.layers:
            print(layer)
        
        return ""


    def forward_propagation(self,X):
        caches = [self.layers[0].forward_prop(X)]
       



        for i in range(1,len(self.layers)):
            caches.append(self.layers[i].forward_prop(caches[i-1][1]))
            #print(f"Output of layer {i+1}:\n{caches[i]}")
            #print(f"Shape of layer {i}: {len(caches[i])}")
        
        return caches


    def sigmoid_back(self,dA,cache):
        Z = cache
        s= 1/(1+np.exp(-Z))

        dZ = dA * s * (1-s)

        #print(f"dZ shape after sigmoid back: {dZ.shape}")
        

        return dZ
    

    def relu_back(seld,dA,cache):
        

        Z = cache
        

        dZ = np.copy(dA)

        dZ[Z <= 0] = 0

        return dZ
    




    def linear_backward(self,dZ,W,b,A_prev):

        m = len(A_prev)

        dW = (1/m) * np.dot(A_prev.T,dZ)

        db = (1/m) * np.sum(dZ,axis=1,keepdims=True)
        

        dA_prev = np.dot(W.T,dZ)


        '''
        print(f"A_prev shape: {A_prev.shape}")

        print(f"dA_prev shape: {dA_prev.shape}")

        print(f"dW shape: {dW.shape}")
        '''

        return dA_prev, db, dW
    


    def linear_activation_backward(self,dA,activation,cache,W,b):



        linear_cache, activation_cache = cache

        if activation == "sigmoid":
            dZ = self.sigmoid_back(dA,activation_cache)

 
            

            dA_prev, db, dW = self.linear_backward(dZ,W,b,linear_cache)



        elif activation =="relu":

            dZ = self.relu_back(dA,activation_cache)

            dA_prev, db,dW = self.linear_backward(dZ,W,b,linear_cache)
        

        return dA_prev, dW, db    
    
    



    def compute_cost(self,cost_func,Y,yhat,epsilon):

        if cost_func =="crossentropy":
            
            cost = np.sum((1-Y) * (-np.log(1-yhat + epsilon)) - Y * (np.log(yhat + epsilon)))/len(Y)
            print(cost)
            return cost
        else:
            return f"No such a function as {cost_func}!"
        

    
    


    def backprop(self,Y,caches):


        '''
        AL: output of forward propagation
        Y : true values
        caches:evlayersery cache of 
        
        
        '''
        AL = caches[-1][1]
        grads = {}

        L = len(self.layers)

        

        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

        


        current_cache = caches[L-1]

        #print(f"FOR LAYER {L-1}")

        grads["dA" + str(L-1)], grads["dW" + str(L)],grads["db" + str(L)] = self.linear_activation_backward(dA = dAL,
                                                               activation=self.layers[-1].act_func,
                                                               cache=current_cache,
                                                               W = self.layers[-1].weights,
                                                               b = self.layers[-1].bias)
        
        


        for l in reversed(range(L-1)):
            #print(f"FOR LAYER {l}")
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(dA = grads["dA" + str(l+1)],
                                                                             activation = self.layers[l].act_func,
                                                                             cache = current_cache,
                                                                             W = self.layers[l].weights,
                                                                             b = self.layers[l].bias)
            grads["dA" + str(l)] = dA_prev_temp
            grads["dW" + str(l + 1)] = dW_temp
            grads["db" + str(l + 1)] = db_temp

            

            
            

            

        return grads
    

    
    def update_params(self, grads,alpha):

        
        '''
        alpha: learning rate
        '''
        L = len(self.layers)

        

    # Update rule for each parameter
        for l in range(len(self.layers)):
            self.layers[l].weights = self.layers[l].weights - alpha * grads["dW" + str(l+1)]
            
            self.layers[l].bias = self.layers[l].bias - alpha * grads["db" + str(l+1)]
            #print(f"Weights shape: {self.layers[l].weights.shape} --- Bias shape: {self.layers[l].bias.shape}")
            #print(self.layers[l].bias)

        
        return None
    

    def train(self,X,Y,alpha,max_iter):

        for iteration in range(max_iter):
            forward_cache = self.forward_propagation(X)
            backprop_grads = self.backprop(Y,forward_cache)
            self.update_params(backprop_grads,alpha)
            if iteration % 100 == 0:
                print(f"COST AFTER ITERATION {iteration}: {self.compute_cost('crossentropy',Y,forward_cache[-1][1],1e-8)}")

        

        print("Training is done!")

        return None






In [330]:
X = np.array([[1,3,4,5],[2,5,6,7],[3,4,2,3]])
Y = np.array([[0,1,0,0]]).T

print(X.shape)
print(Y.shape)


deneme = NN()
deneme.add_layer(3,"sigmoid",5)
deneme.add_layer(5,"sigmoid",10)
deneme.add_layer(10,"sigmoid",4)
deneme.add_layer(4,"relu",1)



(3, 4)
(4, 1)
Added a layer!
Added a layer!
Added a layer!
Added a layer!


In [331]:
deneme.train(X,Y,0.001,1000)

IndexError: boolean index did not match indexed array along dimension 0; dimension is 4 but corresponding boolean dimension is 1

In [308]:
deneme.update_params(grads,0.01)

Weights shape: (5, 3) --- Bias shape: (5, 1)
[[-1.18983029e-05]
 [-9.75405443e-06]
 [-1.01124261e-05]
 [-1.16817030e-05]
 [-1.33332967e-05]]
Weights shape: (10, 5) --- Bias shape: (10, 1)
[[-2.24131476e-05]
 [-2.09274964e-05]
 [-3.39392104e-05]
 [-2.03393579e-05]
 [-1.97018183e-05]
 [-2.18419711e-05]
 [-2.49509209e-05]
 [-2.81303147e-05]
 [-2.18176365e-05]
 [-2.42924932e-05]]
Weights shape: (4, 10) --- Bias shape: (4, 1)
[[-0.00055712]
 [-0.0007068 ]
 [-0.00068634]
 [-0.00050055]]
Weights shape: (1, 4) --- Bias shape: (1, 1)
[[-0.06656332]]


'Done!'

In [309]:
deneme.compute_cost("crossentropy",Y,deneme.forward_propagation(X)[-1][0],1e-8)

0.37867960997375827


0.37867960997375827

In [93]:
for i in reversed(range(2)):
    print(i)

1
0
