<a href="https://colab.research.google.com/github/kareemelhawaryy/AMIT/blob/main/backward_forward_propagations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [2]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def sigmoid_derivative(A):
    return A * (1 - A)

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

In [3]:
def initialize_parameters(layer_dims):
    np.random.seed(1)
    parameters = {}
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

In [4]:
def forward_propagation(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2

    for l in range(1, L):
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        Z = np.dot(W, A) + b
        A = relu(Z)
        caches.append((A, W, b, Z))

    W = parameters['W' + str(L)]
    b = parameters['b' + str(L)]
    Z = np.dot(W, A) + b
    A = sigmoid(Z)
    caches.append((A, W, b, Z))

    return A, caches

In [10]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(AL + 1e-8) + (1 - Y) * np.log(1 - AL + 1e-8)) / m
    return cost
def backward_propagation(X, Y, caches, parameters):
    grads = {}
    L = len(caches)
    m = X.shape[1]
    AL, WL, bL, ZL = caches[-1]
    dZ = AL - Y
    A_prev = caches[-2][0] if L > 1 else X
    grads['dW' + str(L)] = (1 / m) * np.dot(dZ, A_prev.T)
    grads['db' + str(L)] = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(parameters['W' + str(L)].T, dZ)

    for l in reversed(range(L - 1)):
        A, W, b, Z = caches[l]
        dZ = dA_prev * relu_derivative(Z)
        A_prev = X if l == 0 else caches[l - 1][0]
        grads['dW' + str(l + 1)] = (1 / m) * np.dot(dZ, A_prev.T)
        grads['db' + str(l + 1)] = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(parameters['W' + str(l + 1)].T, dZ)

    return grads

In [11]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(1, L + 1):
        parameters['W' + str(l)] -= learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] -= learning_rate * grads['db' + str(l)]
    return parameters

def model(X, Y, layers_dims, learning_rate=0.1, num_iterations=100):
    parameters = initialize_parameters(layers_dims)

    for i in range(num_iterations):
        AL, caches = forward_propagation(X, parameters)
        cost = compute_cost(AL, Y)
        grads = backward_propagation(X, Y, caches, parameters)
        parameters = update_parameters(parameters, grads, learning_rate)
        if i % 10 == 0:
            print(f"Iteration {i}, cost: {cost:.4f}")

    return parameters

In [12]:
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])
Y = np.array([[0, 1, 1, 0]])
layers_dims = [2, 4, 3, 1]

model(X, Y, layers_dims, learning_rate=1.0, num_iterations=100)

Iteration 0, cost: 0.6931
Iteration 10, cost: 0.6931
Iteration 20, cost: 0.6931
Iteration 30, cost: 0.6931
Iteration 40, cost: 0.6931
Iteration 50, cost: 0.6931
Iteration 60, cost: 0.6931
Iteration 70, cost: 0.6931
Iteration 80, cost: 0.6931
Iteration 90, cost: 0.6931


{'W1': array([[ 0.01624345, -0.00556695],
        [-0.00528172, -0.01072969],
        [ 0.0068055 , -0.02301539],
        [ 0.01744811, -0.01129289]]),
 'b1': array([[ 5.51886960e-04],
        [ 0.00000000e+00],
        [ 5.86297704e-06],
        [-2.83649428e-05]]),
 'W2': array([[ 0.00242212, -0.0024937 ,  0.01355014, -0.0219077 ],
        [-0.00322417, -0.00384054,  0.01133769, -0.01099891],
        [-0.00107705, -0.00877858,  0.00132463,  0.00692215]]),
 'b2': array([[ 0.00137573],
        [-0.0014309 ],
        [ 0.00112877]]),
 'W3': array([[-0.01192305,  0.01144724,  0.00974575]]),
 'b3': array([[3.96225887e-06]])}