<a href="https://colab.research.google.com/github/olaf-ys/Forward-and-Backward-Propagation-in-MLP/blob/main/Forward%26Backward_Propagation_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple MLP model
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        # Linear transformation from the first layer to the second layer, input dimension is 2, output dimension is 1, with bias
        self.layer0 = nn.Linear(2, 1)
        # Linear transformation from the second layer to the third layer, input dimension is 1, output dimension is 1, with bias
        self.layer1 = nn.Linear(1, 1)
        # Sigmoid activation function
        self.activation = nn.Sigmoid()

    def forward(self, x):
        # Pass through the first layer
        x = self.layer0(x)
        x = self.activation(x)
        # Pass through the second layer
        x = self.layer1(x)
        x = self.activation(x)
        return x

# Create a model instance
model = SimpleMLP()

# Initialize weights and biases
with torch.no_grad():
    model.layer0.weight.fill_(0.5)
    model.layer0.bias.fill_(1.0)
    model.layer1.weight.fill_(0.5)
    model.layer1.bias.fill_(1.0)

# Input data
input = torch.tensor([1.0, 2.0])

# Perform a forward pass
output = model(input)
print("Output of the MLP:", output.item())

target = torch.tensor([0.4])

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Forward pass
output = model(input)
loss = criterion(output, target)

# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()

# Check gradients after backward pass
print("\nGradients after backward:")
for name, param in model.named_parameters():
    print(f"{name} grad: {param.grad}")

# Output updated weights and biases
print("\nUpdated weights and biases:")
for name, param in model.named_parameters():
    print(name, param.data)

Output of the MLP: 0.8118491768836975

Gradients after backward:
layer0.weight grad: tensor([[0.0044, 0.0088]])
layer0.bias grad: tensor([0.0044])
layer1.weight grad: tensor([[0.1163]])
layer1.bias grad: tensor([0.1258])

Updated weights and biases:
layer0.weight tensor([[0.4996, 0.4991]])
layer0.bias tensor([0.9996])
layer1.weight tensor([[0.4884]])
layer1.bias tensor([0.9874])


In [3]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize weights and biases
W0 = np.array([[0.5, 0.5]])  # Weights from input to hidden layer
b0 = np.array([1.0])         # Biases from input to hidden layer
W1 = np.array([[0.5]])       # Weights from hidden to output layer
b1 = np.array([1.0])         # Biases from hidden to output layer

# Input data
input_data = np.array([1.0, 2.0])

# Target data
target = np.array([0.4])

# Learning rate
lr = 0.1

# Forward propagation
Z1 = np.dot(W0, input_data) + b0
A1 = sigmoid(Z1)
Z2 = np.dot(W1, A1) + b1
A2 = sigmoid(Z2)

# Compute error at the output layer
delta_2 = 2*(A2 - target) * sigmoid_derivative(A2)

# Backpropagate the error to the previous layer
delta_1 = delta_2 * W1 * sigmoid_derivative(A1)

# Update gradients
W1 -= lr * delta_2 * A1.T
b1 -= lr * delta_2
W0 -= lr * np.dot(delta_1, input_data.reshape(1, -1))
b0 -= lr * delta_1.squeeze()

# Output the results
print("Updated weights and biases from input to hidden layer:")
print("W0:", W0)
print("b0:", b0)
print("Updated weights and biases from hidden to output layer:")
print("W1:", W1)
print("b1:", b1)

Updated weights and biases from input to hidden layer:
W0: [[0.49955898 0.49911796]]
b0: [0.99955898]
Updated weights and biases from hidden to output layer:
W1: [[0.48837245]]
b1: [0.987418]
