<a href="https://colab.research.google.com/github/olaf-ys/Forward-and-Backward-Propagation-in-MLP/blob/main/Forward%26Backward_Propagation_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义一个简单的MLP模型
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        # 第一层到第二层的线性变换，输入维度为2，输出维度为1，包含偏置
        self.layer0 = nn.Linear(2, 1)
        # 第二层到第三层的线性变换，输入维度为1，输出维度为1，包含偏置
        self.layer1 = nn.Linear(1, 1)
        # Sigmoid激活函数
        self.activation = nn.Sigmoid()

    def forward(self, x):
        # 通过第一层
        x = self.layer0(x)
        x = self.activation(x)
        # 通过第二层
        x = self.layer1(x)
        x = self.activation(x)
        return x

# 创建模型实例
model = SimpleMLP()

# 初始化权重和偏置
with torch.no_grad():
    model.layer0.weight.fill_(0.5)
    model.layer0.bias.fill_(1.0)
    model.layer1.weight.fill_(0.5)
    model.layer1.bias.fill_(1.0)

# 输入数据
input = torch.tensor([1.0, 2.0])

# 进行一次前向传播
output = model(input)
print("Output of the MLP:", output.item())

target = torch.tensor([0.4])

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# 前向传播
output = model(input)
loss = criterion(output, target)

# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()

# 反向传播后查看梯度
print("\nGradients after backward:")
for name, param in model.named_parameters():
    print(f"{name} grad: {param.grad}")

# 输出更新后的权重和偏置
print("\nUpdated weights and biases:")
for name, param in model.named_parameters():
    print(name, param.data)

Output of the MLP: 0.8118491768836975

Gradients after backward:
layer0.weight grad: tensor([[0.0044, 0.0088]])
layer0.bias grad: tensor([0.0044])
layer1.weight grad: tensor([[0.1163]])
layer1.bias grad: tensor([0.1258])

Updated weights and biases:
layer0.weight tensor([[0.4996, 0.4991]])
layer0.bias tensor([0.9996])
layer1.weight tensor([[0.4884]])
layer1.bias tensor([0.9874])


In [50]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# 初始化权重和偏置
W0 = np.array([[0.5, 0.5]])  # 权重从输入到隐藏层
b0 = np.array([1.0])         # 偏置从输入到隐藏层
W1 = np.array([[0.5]])       # 权重从隐藏到输出层
b1 = np.array([1.0])         # 偏置从隐藏到输出层

# 输入数据
input_data = np.array([1.0, 2.0])

# 目标数据
target = np.array([0.4])

# 学习率
lr = 0.1

# 前向传播
Z1 = np.dot(W0, input_data) + b0
A1 = sigmoid(Z1)
Z2 = np.dot(W1, A1) + b1
A2 = sigmoid(Z2)

# 计算输出层的误差
delta_2 = (A2 - target) * sigmoid_derivative(A2)

# 反向传播误差到前一层
delta_1 = delta_2 * W1 * sigmoid_derivative(A1)

# 梯度更新
W1 -= lr * delta_2 * A1.T
b1 -= lr * delta_2
W0 -= lr * np.dot(delta_1, input_data.reshape(1, -1))
b0 -= lr * delta_1.squeeze()

# 输出计算结果
print("Updated weights and biases from input to hidden layer:")
print("W0:", W0)
print("b0:", b0)
print("Updated weights and biases from hidden to output layer:")
print("W1:", W1)
print("b1:", b1)

Updated weights and biases from input to hidden layer:
W0: [[0.49977949 0.49955898]]
b0: [0.99977949]
Updated weights and biases from hidden to output layer:
W1: [[0.49418622]]
b1: [0.993709]
