##### Importing the necessary packages

In [4]:
import numpy as np

##### Defining the required functions

In [5]:
# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

##### Input features and target output

In [6]:
x = np.array([0.5, 0.8])
y = 1

##### Initial weights and biases

In [7]:
W1 = np.array([[0.2, -0.3], [0.4, 0.1], [-0.5, 0.2]])
b1 = np.array([0.1, -0.2, 0.1])
W2 = np.array([0.3, -0.4, 0.5])
b2 = 0.1

#### Forward Pass

In [8]:
# The input to the hidden layer
z1 = np.dot(W1, x) + b1

# Applying the ReLU activation function
a1 = relu(z1)

# The input to the output layer
z2 = np.dot(W2, a1) + b2

# Applying sigmoid activation function
y_hat = sigmoid(z2)

# Printing out the outputs
print(f"Hidden layer input (z1): {z1}")
print(f"Hidden layer activations (a1): {a1}")
print(f"Output layer input (z2): {z2}")
print(f"Output (y_hat): {y_hat}")

Hidden layer input (z1): [-0.04  0.08  0.01]
Hidden layer activations (a1): [0.   0.08 0.01]
Output layer input (z2): 0.07300000000000001
Output (y_hat): 0.5182418997957381


##### Calculat the loss:
##### - compute the binary cross-entropy loss between the predicted output and the target output

In [9]:

# Binary cross-entropy loss function
def binary_cross_entropy_loss(y_true, y_pred):
    return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Computing the binary cross-entropy loss
loss = binary_cross_entropy_loss(y, y_hat)

print(f"Binary cross-entropy loss: {loss}")

Binary cross-entropy loss: 0.6573131577049656


##### Backward pass
##### - compute the gradients of the loss with respect to the output layer weights and biases
##### -compute the gradients of the loss with respect to the hidden layer weights and biases

In [10]:
# Functions for derivatives of the Sigmoid activation functions
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Binary cross-entropy loss function
def binary_cross_entropy_loss(y_true, y_pred):
    return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Backward pass
# Gradient of the loss with respect to z2
dL_dz2 = y_hat - y

# Gradient of the loss with respect to W2 and b2
dL_dW2 = dL_dz2 * a1
dL_db2 = dL_dz2

# Gradient of the loss with respect to a1
dL_da1 = dL_dz2 * W2

# Gradient of the loss with respect to z1
dL_dz1 = dL_da1 * relu_derivative(z1)

# Gradient of the loss with respect to W1 and b1
dL_dW1 = np.outer(dL_dz1, x)
dL_db1 = dL_dz1

print(f"Gradient of loss with respect to W2: {dL_dW2}")
print(f"Gradient of loss with respect to b2: {dL_db2}")
print(f"Gradient of loss with respect to W1: {dL_dW1}")
print(f"Gradient of loss with respect to b1: {dL_db1}")


Gradient of loss with respect to W2: [-0.         -0.03854065 -0.00481758]
Gradient of loss with respect to b2: -0.4817581002042619
Gradient of loss with respect to W1: [[-0.         -0.        ]
 [ 0.09635162  0.15416259]
 [-0.12043953 -0.19270324]]
Gradient of loss with respect to b1: [-0.          0.19270324 -0.24087905]


#### Update the weights and biases:
#### - Use the gradieants computed backward pass to update the weights and biases of both layers using gradient descent with a learning rate of 0.1

#### After completing these steps provide the updated weights and biases of both layers and discuss the implications of the updates on the network's perfomance

In [11]:
# Learning rate
learning_rate = 0.1

# Update weights and biases
W1 -= learning_rate * dL_dW1
b1 -= learning_rate * dL_db1
W2 -= learning_rate * dL_dW2
b2 -= learning_rate * dL_db2

print("Updated weights and biases after one iteration:")
print(f"W1: {W1}")
print(f"b1: {b1}")
print(f"W2: {W2}")
print(f"b2: {b2}")

# Forward pass with updated weights and biases to check new output and loss
z1_new = np.dot(W1, x) + b1
a1_new = relu(z1_new)
z2_new = np.dot(W2, a1_new) + b2
y_hat_new = sigmoid(z2_new)
new_loss = binary_cross_entropy_loss(y, y_hat_new)

print(f"New hidden layer input (z1): {z1_new}")
print(f"New hidden layer activations (a1): {a1_new}")
print(f"New output layer input (z2): {z2_new}")
print(f"New output (y_hat): {y_hat_new}")
print(f"New binary cross-entropy loss: {new_loss}")

Updated weights and biases after one iteration:
W1: [[ 0.2        -0.3       ]
 [ 0.39036484  0.08458374]
 [-0.48795605  0.21927032]]
b1: [ 0.1        -0.21927032  0.12408791]
W2: [ 0.3        -0.39614594  0.50048176]
b2: 0.1481758100204262
New hidden layer input (z1): [-0.04        0.04357909  0.05552614]
New hidden layer activations (a1): [0.         0.04357909 0.05552614]
New output layer input (z2): 0.15870195200089976
New output (y_hat): 0.5395924239414079
New binary cross-entropy loss: 0.6169411948853357
