In [2]:
import numpy as npz
import sys
sys.path.append('../')
from utils import *

In [4]:
def backpropagation_step(dA: np.ndarray, cache: dict, activation: str = 'relu') -> tuple:
    """
    Performs a backpropagation step, computes gradients.

    Parameters
    ----------
    dA : np.ndarray
        Gradient computed ealier.

    cache : dictionary
        Tuple of values A_prev, W, b and Z calculated during the forward pass.
    
    Returns
    -------
    dA_prev : np.ndarray
        Gradient of the cost w.r.t the activation

    dW : np.ndarray
        Gradient of the cost w.r.t W
        
    db : np.ndarray
         Gradient of the cost w.r.t b 
    """
    A_prev, W, b, Z = cache['A_prev'], cache['W'], cache['b'], cache['Z']
    m = A_prev.shape[1]

    if activation == 'relu':
        dZ = dA * relu_backward(dA, Z)
    elif activation == 'sigmoid':
        dZ = dA * sigmoid_backward(dA, Z)

    dW = dZ @ A_prev.T / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = W.T @ dZ

    return dA_prev, dW, db

In [None]:
def backpropagation(AL: np.ndarray, y: np.ndarray, caches: list, number_of_layers: int):
    """
    Perfoms a full backpropagation.

    Parameters
    ----------
    AL : np.ndarray
        Output of the forward propagation
    
    y : nd.ndarray
        Target prediction values
    
    caches : dictionary
        List of caches from the forward propagation

    number_of_layers : int
        A number of layers

    Returns
    -------
    grads : dictionary
        A dictionary of gradients from every step of BP
    """
    grads = {}
    m = AL.shape[1]
    y = y.reshape(AL.shape)

    dAL = -(y / AL - (1 - y) / (1 - AL))

    current_cache = caches[number_of_layers-1]
    dA_prev_temp, dW_temp, db_temp = backpropagation_step(dAL, current_cache, 'sigmoid')
    grads["dA" + str(number_of_layers-1)] = dA_prev_temp
    grads["dW" + str(number_of_layers)] = dW_temp
    grads["db" + str(number_of_layers)] = db_temp

    for l in reversed(range(number_of_layers-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = backpropagation_step(dA_prev_temp, current_cache, 'relu')
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads