In [78]:
import numpy as np
np.random.seed(1)

In [86]:
def sigmoid(Z : np.ndarray) -> np.ndarray:
    a = 1/(1+np.exp(-z))
    return a

def relu(Z : np.ndarray) -> np.array:
    a = np.maximum(0,Z)
    return a

def tanh(Z : np.ndarray) -> np.ndarray:
    a = np.tanh(Z)
    return a

def d_sigmoid(Z : np.ndarray) -> np.ndarray:
    d_g = sigmoid(Z)(1-sigmoid(Z))
    return d_g

def d_relu(Z : np.ndarray) -> np.ndarray:
    d_g = Z[Z<=0] = 0
    return d_g

def d_tanh(Z : np.ndarray) -> np.ndarray:
    d_g = 1-np.power(tanh(Z))
    return d_g

In [80]:
def initialize_parameters(layer_dims : list[int]) -> dict[str,np.ndarray]:
    parameters = {}
    L = len(layer_dims)

    for l in range(1,L):
        parameters[f"W{l}"] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters[f"b{l}"] = np.zeros((layer_dims[l],1))

    return parameters
    

In [87]:
def fwd_layer_computation(W : np.ndarray,b : np.ndarray, X : np.ndarray, activation : str)-> (np.ndarray, (np.ndarray,np.ndarray,np.ndarray)):
    l = w.shape[0]
    m = w.shape[1]

    Z = np.dot(W,X) + b

    if activation == "sigmoid":
        A = sigmoid(z)
    if activation == "relu":
        A = relu(z)
    if activation == "tanh":
        A = tanh(z)

    cache = (W,b,Z)
    
    return A,cache    

In [93]:
def forward_propogation(parameters : dict[str,np.ndarray], X : np.ndarray) -> (np.ndarray,((dict[str,np.ndarray]),np.ndarray)):
    L = len(parameters)//2
    A = X
    caches = []
    for l in range(1,L):
        W = parameters[f"W{i}"]
        b = parameters[f"b{i}"]
        
        A,cache = fwd_layer_computation(W,b,A,"relu")
        caches.append((cache,A))

    AL,cache = fwd_layer_computation(W,b,A,"sigmoid")
    caches.append((cache,AL))

    return AL,caches
    
    

In [94]:
def compute_cost(AL : np.ndarray,Y : np.ndarray) -> float:
    m = AL.shape[1]

    J = -(1/m)*np.sum(np.dot(Y,np.log(AL)) - (np.dot(1-Y,np.log(1-AL))), axis = 0)

    return J

In [95]:
def bkwd_layer_propogation(dA : np.ndarray, Z : np.ndarray, W : np.ndarray, A : np.ndarray, activation : str) -> (dict[str,np.ndarray], np.ndarray):
    grad = {}

    if activation == "sigmoid":
        dZ = np.multiply(dA,d_sigmoid(Z))
    if activation == "relu":
        dZ = np.multiply(dA,d_relu(Z))
    if activation == "tanh":
        dZ = np.multiply(dA,d_tanh(Z))

    dW = np.dot(dZ,A.T)
    db = np.sum(dZ,axis=1)
    dA = np.dot(dW.T,dZ)

    grad["dW"] = dW
    grad["db"] = db

    return grad, dA    

In [96]:
def backpropogation(caches, Y):
    L = len(caches)

    linear_cache = caches[L-1][0]
    activation_cache = caches[L-1][1]

    AL = activation_cache
    W = linear_cache[0]
    b = linear_cache[1]
    Z = linear_cache[2]
    
    dA = -np.divide(Y,AL) + np.divide((1-Y),(1-AL))
    grads = {}

    grad,dA = bkwd_layer_propogation(dA,Z,W,AL,"sigmoid")
    grads[f"dW{L}"] = grad["dW"]
    grads[f"db{L}"] = grad["db"]

    for l in range(L-2,0,-1):
        linear_cache = caches[l][0]
        activation_cache = caches[l][1]

        AL = activation_cache
        W = linear_cache[0]
        b = linear_cache[1]
        Z = linear_cache[2]

        grad,dA = bkwd_layer_propogation(dA,Z,W,AL,"relu")
        grads[f"dW{l}"] = grad["dW"]
        grads[f"db{l}"] = grad["db"]


    return grads

In [97]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters)/2

    for l in range(1,L+1):
        W = parameters[f"W{l}"]
        b = parameters[f"b{l}"]
        m = W.shape[0]

        dW = grads[f"dW{l}"]
        db = grads[f"db{l}"]
        
        W = W - (1/m)*np.sum(dW,axis=1)
        b = b - (1/m)*np.sum(db,axis=1)

        parameters[f"W{l}"] = W
        parameters[f"W{l}"] = b

    return parameters
    
    
    