<a href="https://colab.research.google.com/github/l1f21bsee0018/DEEP-LEARNING/blob/main/EX%2304%20vanishing_gradient_sigmoid_relu_120325.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

print("=== [1] Activation Functions Defined ===")

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

print("Sigmoid, Sigmoid Derivative, ReLU, and ReLU Derivative functions are ready.\n")


=== [1] Activation Functions Defined ===
Sigmoid, Sigmoid Derivative, ReLU, and ReLU Derivative functions are ready.



In [None]:
print("=== [2] Initialization ===")

# Weights and biases
w1, b1 = 0.5, 0
w2, b2 = 0.5, 0
w3, b3 = 0.5, 0

# Input and true label
x = 1
y = 1

print(f"Initial Weights: w1 = {w1}, w2 = {w2}, w3 = {w3}")
print(f"Biases: b1 = {b1}, b2 = {b2}, b3 = {b3}")
print(f"Input: x = {x}, True Label: y = {y}\n")


=== [2] Initialization ===
Initial Weights: w1 = 0.5, w2 = 0.5, w3 = 0.5
Biases: b1 = 0, b2 = 0, b3 = 0
Input: x = 1, True Label: y = 1



In [None]:
print("=== [3] Forward Pass: Sigmoid Network ===")

z1_sigmoid = w1 * x + b1
a1_sigmoid = sigmoid(z1_sigmoid)

z2_sigmoid = w2 * a1_sigmoid + b2
a2_sigmoid = sigmoid(z2_sigmoid)

z3_sigmoid = w3 * a2_sigmoid + b3
y_pred_sigmoid = sigmoid(z3_sigmoid)

loss_sigmoid = 0.5 * (y - y_pred_sigmoid) ** 2

print(f"a1: {a1_sigmoid:.4f}, a2: {a2_sigmoid:.4f}, y_pred: {y_pred_sigmoid:.4f}, Loss: {loss_sigmoid:.4f}\n")


=== [3] Forward Pass: Sigmoid Network ===
a1: 0.6225, a2: 0.5772, y_pred: 0.5717, Loss: 0.0917



In [None]:
print("=== [4] Backward Pass: Sigmoid Network ===")

dL_dy_pred_sigmoid = -(y - y_pred_sigmoid)
dy_pred_dz3_sigmoid = sigmoid_derivative(z3_sigmoid)
dL_dw3_sigmoid = dL_dy_pred_sigmoid * dy_pred_dz3_sigmoid * a2_sigmoid

dL_da2_sigmoid = dL_dy_pred_sigmoid * dy_pred_dz3_sigmoid * w3
da2_dz2_sigmoid = sigmoid_derivative(z2_sigmoid)
dL_dw2_sigmoid = dL_da2_sigmoid * da2_dz2_sigmoid * a1_sigmoid

dL_da1_sigmoid = dL_da2_sigmoid * da2_dz2_sigmoid * w2
da1_dz1_sigmoid = sigmoid_derivative(z1_sigmoid)
dL_dw1_sigmoid = dL_da1_sigmoid * da1_dz1_sigmoid * x

print(f"dL/dw3: {dL_dw3_sigmoid:.4f}, dL/dw2: {dL_dw2_sigmoid:.4f}, dL/dw1: {dL_dw1_sigmoid:.4f}\n")


=== [4] Backward Pass: Sigmoid Network ===
dL/dw3: -0.0605, dL/dw2: -0.0080, dL/dw1: -0.0015



In [None]:
print("=== [5] Forward Pass: ReLU Network ===")

z1_relu = w1 * x + b1
a1_relu = relu(z1_relu)

z2_relu = w2 * a1_relu + b2
a2_relu = relu(z2_relu)

z3_relu = w3 * a2_relu + b3
y_pred_relu = relu(z3_relu)

loss_relu = 0.5 * (y - y_pred_relu) ** 2

print(f"a1: {a1_relu:.4f}, a2: {a2_relu:.4f}, y_pred: {y_pred_relu:.4f}, Loss: {loss_relu:.4f}\n")


=== [5] Forward Pass: ReLU Network ===
a1: 0.5000, a2: 0.2500, y_pred: 0.1250, Loss: 0.3828



In [None]:
print("=== [6] Backward Pass: ReLU Network ===")

dL_dy_pred_relu = -(y - y_pred_relu)
dy_pred_dz3_relu = relu_derivative(z3_relu)
dL_dw3_relu = dL_dy_pred_relu * dy_pred_dz3_relu * a2_relu

dL_da2_relu = dL_dy_pred_relu * dy_pred_dz3_relu * w3
da2_dz2_relu = relu_derivative(z2_relu)
dL_dw2_relu = dL_da2_relu * da2_dz2_relu * a1_relu

dL_da1_relu = dL_da2_relu * da2_dz2_relu * w2
da1_dz1_relu = relu_derivative(z1_relu)
dL_dw1_relu = dL_da1_relu * da1_dz1_relu * x

print(f"dL/dw3: {dL_dw3_relu:.4f}, dL/dw2: {dL_dw2_relu:.4f}, dL/dw1: {dL_dw1_relu:.4f}\n")


=== [6] Backward Pass: ReLU Network ===
dL/dw3: -0.2188, dL/dw2: -0.2188, dL/dw1: -0.2188



In [None]:
print("=== [7] Final Comparison: Sigmoid vs ReLU Gradients ===")

print("--- Sigmoid Gradients ---")
print(f"dL/dw3: {dL_dw3_sigmoid:.4f}")
print(f"dL/dw2: {dL_dw2_sigmoid:.4f}")
print(f"dL/dw1: {dL_dw1_sigmoid:.4f}")

print("\n--- ReLU Gradients ---")
print(f"dL/dw3: {dL_dw3_relu:.4f}")
print(f"dL/dw2: {dL_dw2_relu:.4f}")
print(f"dL/dw1: {dL_dw1_relu:.4f}")


=== [7] Final Comparison: Sigmoid vs ReLU Gradients ===
--- Sigmoid Gradients ---
dL/dw3: -0.0605
dL/dw2: -0.0080
dL/dw1: -0.0015

--- ReLU Gradients ---
dL/dw3: -0.2188
dL/dw2: -0.2188
dL/dw1: -0.2188
