# A9 : Neural Networks

---

### 1. Calculate the outputs of the given data samples

In [1]:
import math

# Define weights
w = [1.5, 0.5, 0.5, 1.5, -1.5, -0.5, -1, 1, -0.5]

# Activation functions
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def Hardlim(x):
    return 1 if x >= 0 else 0

def ReLU(x):
    return max(0, x)

# Calculate the output with the given activation function
def calculate_output(x1, x2, activation_function):
    s1 = w[0] * x1 + w[2] * x2 + w[4]
    s2 = w[1] * x1 + w[3] * x2 + w[5]
    s3 = w[6] * activation_function(s1) + w[7] * activation_function(s2) + w[8]
    return activation_function(s3)

# Function to display results
def display_results(activation_function, function_name):
    print(f"\nall activation functions are [{function_name}]\n")
    for x1 in range(2):
        for x2 in range(2):
            y = calculate_output(x1, x2, activation_function)
            print(f"x1 = {x1}, x2 = {x2}, y = {y}")

# Display results for each activation function
display_results(Hardlim, "Hardlim")
display_results(sigmoid, "sigmoid")
display_results(ReLU, "ReLU")



all activation functions are [Hardlim]

x1 = 0, x2 = 0, y = 0
x1 = 0, x2 = 1, y = 1
x1 = 1, x2 = 0, y = 0
x1 = 1, x2 = 1, y = 0

all activation functions are [sigmoid]

x1 = 0, x2 = 0, y = 0.42436377623451826
x1 = 0, x2 = 1, y = 0.4905304217782711
x1 = 1, x2 = 0, y = 0.3775406687981454
x1 = 1, x2 = 1, y = 0.42436377623451826

all activation functions are [ReLU]

x1 = 0, x2 = 0, y = 0
x1 = 0, x2 = 1, y = 0.5
x1 = 1, x2 = 0, y = 0
x1 = 1, x2 = 1, y = 0.5


---

### 2. Update the weights of the given neural network using Gradient Descent method

- We have one training sampl (1,1). 
- The initial weights are w1=1, w2 = 1. 
- The learning rate is 0.1. 
- The activation function is the sigmoid function.
- The Loss function is the MSE.

In [2]:
import numpy as np

# Sigmoid function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(sigmoid_output):
    return sigmoid_output * (1 - sigmoid_output)

# Derivative of the MSE loss function
def mse_loss_derivative(y_true, y_pred):
    return y_pred - y_true

# Single function for forward pass, backward pass, and calculating gradients
def calculate_grad(x, y_true, W1, W2, learning_rate):
    # Forward pass
    s1 = W1 * x
    a1 = sigmoid(s1)
    s2 = W2 * a1
    y_pred = sigmoid(s2)
    
    # Backward pass
    d_loss_y_pred = mse_loss_derivative(y_true, y_pred)
    d_y_pred_s2 = sigmoid_derivative(y_pred)
    d_s2_W2 = a1
    d_s2_a1 = W2
    d_a1_s1 = sigmoid_derivative(a1)
    d_s1_W1 = x

    grad_W2 = d_loss_y_pred * d_y_pred_s2 * d_s2_W2
    grad_W1 = d_loss_y_pred * d_y_pred_s2 * d_s2_a1 * d_a1_s1 * d_s1_W1

    return grad_W1, grad_W2

def train_step(x, y_true, W1, W2, learning_rate):
    grad_W1, grad_W2 = calculate_grad(x, y_true, W1, W2, learning_rate)
    W1 -= learning_rate * grad_W1
    W2 -= learning_rate * grad_W2
    return W1, W2

# Initial weights and learning rate
W1 = 1
W2 = 1
learning_rate = 0.1

# Training sample
x = 1
y_true = 1

# First update
W1, W2 = train_step(x, y_true, W1, W2, learning_rate)
first_update_W1 = W1
first_update_W2 = W2

# Second update
W1, W2 = train_step(x, y_true, W1, W2, learning_rate)
second_update_W1 = W1
second_update_W2 = W2

print(f"First update: W1 = {first_update_W1}, W2 = {first_update_W2}")
print(f"Second update: W1 = {second_update_W1}, W2 = {second_update_W2}")


First update: W1 = 1.0014015358528543, W2 = 1.0052113052936016
Second update: W1 = 1.0028035692263884, W2 = 1.0104027514671299


---

### 3. Update the weights of the given neural network using Gradient Descent method

- We have two training samples (1,1) and (0,0).
- The initial weights are w1=1, w2 = 1. 
- The learning rate is 0.1. 
- The activation function is the sigmoid function.
- The Loss function is the MSE.

In [3]:
import numpy as np

# Sigmoid function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(sigmoid_output):
    return sigmoid_output * (1 - sigmoid_output)

# Derivative of the MSE loss function
def mse_loss_derivative(y_true, y_pred):
    return y_pred - y_true

# Single function for forward pass, backward pass, and calculating gradients
def calculate_grad(x, y_true, W1, W2, learning_rate):
    # Forward pass
    s1 = W1 * x
    a1 = sigmoid(s1)
    s2 = W2 * a1
    y_pred = sigmoid(s2)
    
    # Backward pass
    d_loss_y_pred = mse_loss_derivative(y_true, y_pred)
    d_y_pred_s2 = sigmoid_derivative(y_pred)
    d_s2_W2 = a1
    d_s2_a1 = W2
    d_a1_s1 = sigmoid_derivative(a1)
    d_s1_W1 = x

    grad_W2 = d_loss_y_pred * d_y_pred_s2 * d_s2_W2
    grad_W1 = d_loss_y_pred * d_y_pred_s2 * d_s2_a1 * d_a1_s1 * d_s1_W1

    return grad_W1, grad_W2

def train_step(x, y_true, W1, W2, learning_rate):
    total_grad_W1 = 0
    total_grad_W2 = 0
    
    # calculate the gradients for each training sample
    for i in range(len(x)):
        grad_W1, grad_W2 = calculate_grad(x[i], y_true[i], W1, W2, learning_rate)
        total_grad_W1 += grad_W1
        total_grad_W2 += grad_W2
        
    # update the weights
    W1 -= learning_rate * total_grad_W1
    W2 -= learning_rate * total_grad_W2
    
    return W1, W2

# Initial weights and learning rate
W1 = 1
W2 = 1
learning_rate = 0.1

# Training samples
x = [1, 0]
y = [1, 0]  

# First update
W1, W2 = train_step(x, y, W1, W2, learning_rate)

print (f"First update: W1 = {W1}")




First update: W1 = 1.0014015358528543


---
### 4. Update the weights of the given neural network using Gradient Descent method

- We have one training sampl (1,1). 
- The initial weights are all 1.
- The learning rate is 0.1. 
- The activation function is the sigmoid function.
- The Loss function is the MSE.

In [4]:
import numpy as np

# Sigmoid function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(sigmoid_output):
    return sigmoid_output * (1 - sigmoid_output)

# Derivative of the MSE loss function
def mse_loss_derivative(y_true, y_pred):
    return y_pred - y_true

# Function to perform a forward pass
def forward_pass(x, weights):
    # Unpacking weights
    w1, w2, w3, w4, w5 = weights
    
    # Forward pass
    h1 = sigmoid(w1 * x)
    h2 = sigmoid(w2 * h1)
    h3 = sigmoid(w3 * h1)
    y_pred = sigmoid(w4 * h2 + w5 * h3)
    
    return h1, h2, h3, y_pred

# Function to compute gradients using backward pass
def backward_pass(x, y_true, weights, h1, h2, h3, y_pred):
    # Unpacking weights
    w1, w2, w3, w4, w5 = weights
    
    # Gradients of the loss with respect to the output
    d_loss_y_pred = mse_loss_derivative(y_true, y_pred)
    
    # Gradients of the output with respect to the pre-activation
    d_y_pred_s = sigmoid_derivative(y_pred)
    
    # Gradients with respect to w4 and w5
    d_s_w4 = h2
    d_s_w5 = h3
    grad_w4 = d_loss_y_pred * d_y_pred_s * d_s_w4
    grad_w5 = d_loss_y_pred * d_y_pred_s * d_s_w5
    
    # Gradients with respect to h2 and h3
    d_s_h2 = w4
    d_s_h3 = w5
    d_loss_h2 = d_loss_y_pred * d_y_pred_s * d_s_h2
    d_loss_h3 = d_loss_y_pred * d_y_pred_s * d_s_h3
    
    # Gradients with respect to the pre-activations h2 and h3
    d_h2_s = sigmoid_derivative(h2)
    d_h3_s = sigmoid_derivative(h3)
    
    # Gradients with respect to w2 and w3
    d_s_w2 = h1
    d_s_w3 = h1
    grad_w2 = d_loss_h2 * d_h2_s * d_s_w2
    grad_w3 = d_loss_h3 * d_h3_s * d_s_w3
    
    # Gradients with respect to h1
    d_s_h1_2 = w2
    d_s_h1_3 = w3
    d_loss_h1_2 = d_loss_h2 * d_h2_s * d_s_h1_2
    d_loss_h1_3 = d_loss_h3 * d_h3_s * d_s_h1_3
    d_loss_h1 = d_loss_h1_2 + d_loss_h1_3
    
    # Gradients with respect to the pre-activation h1
    d_h1_s = sigmoid_derivative(h1)
    
    # Gradients with respect to w1
    d_s_w1 = x
    grad_w1 = d_loss_h1 * d_h1_s * d_s_w1
    
    return grad_w1, grad_w2, grad_w3, grad_w4, grad_w5

# Function to perform a single training step
def train_step(x, y_true, weights, learning_rate):
    # Forward pass
    h1, h2, h3, y_pred = forward_pass(x, weights)
    
    # Backward pass
    grads = backward_pass(x, y_true, weights, h1, h2, h3, y_pred)
    
    # Update weights
    updated_weights = [w - learning_rate * grad for w, grad in zip(weights, grads)]
    
    return updated_weights

# Initial weights and learning rate
weights = [1, 1, 1, 1, 1]
learning_rate = 0.1

# Training sample
x = 1
y_true = 1

# First update
weights = train_step(x, y_true, weights, learning_rate)
first_update_w1 = weights[0]

print(f"First update: w1 = {first_update_w1}")

First update: w1 = 1.0002902919772119
