In [1]:
import numpy as np

In [2]:
class Layer:

    def relu(self,X):
        return np.maximum(0,X)
    
    def linear(self,X):
        return X
    
    def sigmoid(self,X):
        return 1/(1 + np.exp(-X))

    def __init__(self,unit_num,act_func,input_size) -> None:


        self.input_size = input_size
        self.unit_num = unit_num
        self.act_func = act_func
        self.weights = np.random.rand(unit_num,input_size)*0.2
        self.bias = np.zeros((unit_num,1))



    def forward_prop(self,inp):
        if self.act_func == "sigmoid":
            
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.sigmoid(linear_output)

            return linear_output,activation_output
        
        elif self.act_func == "relu":
            linear_output = np.dot(self.weights,inp) + self.bias

            activation_output = self.relu(linear_output)

            return linear_output, activation_output

    def __str__(self) -> str:
        return f"Layer with {self.unit_num} units and {self.act_func} activation function."

In [14]:
class NN:

    def __init__(self,X,Y) -> None:
        self.X = X
        self.Y = Y
        self.layers = []
        return None
    
    def add_layer(self,input_size,activation_function,layer_size):


        if (len(self.layers) == 0) and (input_size != self.X.shape[0]):
            raise Exception("First layer's input size should match with the number of features!")
        
        elif len(self.layers) == 0:
            new_layer = Layer(unit_num=layer_size,act_func = activation_function,input_size=input_size)
            self.layers.append(new_layer)
            print("Added a layer!")
            return None

        elif input_size == self.layers[-1].unit_num:
            new_layer = Layer(unit_num=layer_size,act_func = activation_function,input_size=input_size)
            self.layers.append(new_layer)
            print("Added a layer!")
            return None
            
        else:
            raise Exception("Input Size don't match with the last layer's output size!")



        return None
    
    def __str__(self) -> str:

        print(f"Neural Network with {len(self.layers)} layers.")
        for _,layer in enumerate(self.layers):
            print(f"LAYER {_}: {layer}")
        
        return ""


    def forward_propagation(self,X):
        caches = [self.layers[0].forward_prop(X)]
       



        for i in range(1,len(self.layers)):
            caches.append(self.layers[i].forward_prop(caches[i-1][1]))
            #print(f"Output of layer {i+1}:\n{caches[i]}")
            #print(f"Shape of layer {i}: {len(caches[i])}")
        
        return caches


    def sigmoid_back(self,dA,cache):
        Z = cache
        s= 1/(1+np.exp(-Z))

        dZ = dA * s * (1-s)

        #print(f"dZ shape after sigmoid back: {dZ.shape}")
        

        return dZ
    

    def relu_back(seld,dA,cache):
        

        Z = cache
        

        dZ = np.copy(dA)

        dZ[Z <= 0] = 0

        return dZ
    




    def linear_backward(self,dZ,W,b,A_prev):

        m = A_prev.shape[1]

        dW = (1/m) * np.dot(dZ,A_prev.T)

        db = (1/m) * np.sum(dZ,axis=1,keepdims=True)
        

        dA_prev = np.dot(W.T,dZ)


        '''
        print(f"A_prev shape: {A_prev.shape}")

        print(f"dA_prev shape: {dA_prev.shape}")

        print(f"dW shape: {dW.shape}")
        '''

        return dA_prev, db, dW
    


    def linear_activation_backward(self,dA,activation,cache,W,b,l):



        linear_cache, activation_cache = cache

        if activation == "sigmoid":
            dZ = self.sigmoid_back(dA,activation_cache)
            print(f"dZ shape of layer {l}: {dZ.shape}")
            dA_prev, db, dW = self.linear_backward(dZ,W,b,linear_cache)



        elif activation =="relu":

            dZ = self.relu_back(dA,activation_cache)
            print(f"dZ shape of layer {l}: {dZ.shape}")

            dA_prev, db,dW = self.linear_backward(dZ,W,b,linear_cache)
        

        return dA_prev, dW, db    
    
    



    def compute_cost(self,cost_func,yhat,epsilon):

        if cost_func =="crossentropy":
            
            cost = np.sum((1-self.Y) * (-np.log(1-yhat + epsilon)) - self.Y * (np.log(yhat + epsilon)))/len(self.Y)
            print(cost)
            return cost
        else:
            return f"No such a function as {cost_func}!"
        

    def predict(self,X):

        predictions = self.forward_propagation(X)[-1][-1]
        return predictions

    
    


    def backprop(self,Y,caches):


        '''
        AL: output of forward propagation
        Y : true values
        caches:evlayersery cache of 
        
        
        '''
        AL = caches[-1][1]
        grads = {}

        L = len(self.layers)

        

        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))



        current_cache = caches[L-1]

        #print(f"FOR LAYER {L-1}")

        grads["dA" + str(L-1)], grads["dW" + str(L)],grads["db" + str(L)] = self.linear_activation_backward(dA = dAL,
                                                               activation=self.layers[-1].act_func,
                                                               cache=current_cache,
                                                               W = self.layers[-1].weights,
                                                               b = self.layers[-1].bias,l=L-1)
        


        for l in reversed(range(L-1)):
            print(l)
            
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(dA = grads["dA" + str(l+1)],
                                                                             activation = self.layers[l].act_func,
                                                                             cache = current_cache,
                                                                             W = self.layers[l].weights,
                                                                             b = self.layers[l].bias,l=l)
            grads["dA" + str(l)] = dA_prev_temp
            grads["dW" + str(l+1)] = dW_temp
            grads["db" + str(l+1)] = db_temp

            

            
        for key in grads.keys():
            print(f"{key} shape: {grads[key].shape}")
            

            

        return grads

    
    def update_params(self, grads,alpha):

        
        '''
        alpha: learning rate
        '''
        L = len(self.layers)

        

    # Update rule for each parameter
        for l in range(len(self.layers)):
            print(f"Weights shape: {self.layers[l].weights.shape} --- Bias shape: {self.layers[l].bias.shape}")
            print(f"grads[dW{str(l+1)}] shape: {grads['dW' + str(l+1)].shape}")
            self.layers[l].weights = self.layers[l].weights - alpha * grads["dW" + str(l+1)]
            
            self.layers[l].bias = self.layers[l].bias - alpha * grads["db" + str(l+1)]



        
        return None
    

    def train(self,X,Y,alpha,max_iter):

        for iteration in range(max_iter):
            forward_cache = self.forward_propagation(X)
            backprop_grads = self.backprop(Y,forward_cache)
            self.update_params(backprop_grads,alpha)
            if iteration % 100 == 0:
                print(f"COST AFTER ITERATION {iteration}: {self.compute_cost('crossentropy',Y,forward_cache[-1][1],1e-8)}")

        

        print("Training is done!")

        return None






In [15]:
X = np.array([[1,3,4,5],[2,5,6,7],[3,4,2,3]])
Y = np.array([[0,1,0,1]])

print(f"X SET SHAPE: {X.shape}")
print(f"Y SET SHAPE: {Y.shape}")


deneme = Layer(4,"sigmoid",3)
deneme2 = Layer(10,"sigmoid",4)


X SET SHAPE: (3, 4)
Y SET SHAPE: (1, 4)


In [16]:
nn_deneme = NN(X,Y)

In [17]:
nn_deneme.add_layer(3,"sigmoid",10)
nn_deneme.add_layer(10,"sigmoid",20)
nn_deneme.add_layer(20,"relu",4)
nn_deneme.add_layer(4,"sigmoid",1)

Added a layer!
Added a layer!
Added a layer!
Added a layer!


In [18]:
nn_deneme.backprop(Y,nn_deneme.forward_propagation(X))

dZ shape of layer 3: (1, 4)
2
dZ shape of layer 2: (4, 4)
1
dZ shape of layer 1: (20, 4)
0
dZ shape of layer 0: (10, 4)
dA3 shape: (4, 4)
dW4 shape: (1, 1)
db4 shape: (1, 1)
dA2 shape: (20, 4)
dW3 shape: (4, 4)
db3 shape: (4, 1)
dA1 shape: (10, 4)
dW2 shape: (20, 20)
db2 shape: (20, 1)
dA0 shape: (3, 4)
dW1 shape: (10, 10)
db1 shape: (10, 1)


{'dA3': array([[ 0.12258291, -0.05407539,  0.12521231, -0.05382131],
        [ 0.10621257, -0.04685389,  0.10849082, -0.04663374],
        [ 0.12670104, -0.05589203,  0.12941878, -0.05562942],
        [ 0.10514294, -0.04638204,  0.10739825, -0.04616411]]),
 'dW4': array([[0.16575724]]),
 'db4': array([[0.20550736]]),
 'dA2': array([[ 0.03955636, -0.01744962,  0.04040484, -0.01736763],
        [ 0.04836273, -0.0213344 ,  0.0494001 , -0.02123416],
        [ 0.04818614, -0.0212565 ,  0.04921973, -0.02115663],
        [ 0.04829863, -0.02130613,  0.04933463, -0.02120602],
        [ 0.02593854, -0.01144235,  0.02649492, -0.01138859],
        [ 0.02539087, -0.01120076,  0.0259355 , -0.01114813],
        [ 0.06116327, -0.02698115,  0.06247522, -0.02685437],
        [ 0.05292664, -0.0233477 ,  0.05406191, -0.023238  ],
        [ 0.03878284, -0.0171084 ,  0.03961473, -0.01702801],
        [ 0.05853995, -0.02582391,  0.05979563, -0.02570258],
        [ 0.04170076, -0.01839559,  0.04259524, -0.018