In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os

#加载和预处理数据
transform = transforms.Compose([transforms.ToTensor()])
#transforms.ToTensor(): 将PIL图像或NumPy数组转换为PyTorch张量，并且会将图像的像素值从范围 [0, 255]缩放到 [0.0, 1.0]。transforms.Normalize((0.5,), (0.5,)): 将张量的每个通道的像素值进行标准化。像素值从 [0, 1] 归一化到 [-1, 1]。具体公式是：(x - mean) / std，其中mean和std是给定的均值和标准差。
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
#torchvision.datasets.MNIST 是一个用于加载MNIST数据集的类。

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)
#torch.utils.data.DataLoader 是PyTorch用于加载数据集的类。它能够处理批量数据并进行多线程加载。批次的数据量为100,训练随机打乱，测试不打乱。

In [30]:
# 定义神经网络模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()#28*28二维图像展平成一维向量，size=784
        self.fc1 = nn.Linear(28*28, 30)#784维映射到128维
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)#舍去50%的神经元
        self.fc2 = nn.Linear(30, 10)#128维映射导10维

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
    
# 实例化模型、定义损失函数和优化器
model = Net()
criterion = nn.CrossEntropyLoss()#交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [31]:
# 训练模型
num_epochs = 50

for epoch in range(num_epochs):
    model.train()#将模型设置为训练模式，以便在前向传播时启用 Dropout 和 BatchNorm
    running_loss = 0.0
    for inputs, labels in train_loader:
        #inputs_noise1 = add_noise(inputs)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():#在验证过程中禁用梯度计算，以提高计算速度和节省内存。
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

print('Finished Training')

Epoch [1/50], Loss: 0.7598
Accuracy of the network on the 10000 test images: 91.19%
Epoch [2/50], Loss: 0.6512
Accuracy of the network on the 10000 test images: 91.67%
Epoch [3/50], Loss: 0.6284
Accuracy of the network on the 10000 test images: 92.73%
Epoch [4/50], Loss: 0.6247
Accuracy of the network on the 10000 test images: 92.35%
Epoch [5/50], Loss: 0.6181
Accuracy of the network on the 10000 test images: 92.38%
Epoch [6/50], Loss: 0.6072
Accuracy of the network on the 10000 test images: 92.70%
Epoch [7/50], Loss: 0.6016
Accuracy of the network on the 10000 test images: 92.96%
Epoch [8/50], Loss: 0.6008
Accuracy of the network on the 10000 test images: 93.29%
Epoch [9/50], Loss: 0.5912
Accuracy of the network on the 10000 test images: 93.10%
Epoch [10/50], Loss: 0.5911
Accuracy of the network on the 10000 test images: 92.93%
Epoch [11/50], Loss: 0.5896
Accuracy of the network on the 10000 test images: 93.31%
Epoch [12/50], Loss: 0.5827
Accuracy of the network on the 10000 test imag

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import math
# 加载和预处理数据
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

class Custom(nn.Module):  # 激活函数含噪声参数
    def __init__(self):
        super().__init__()
        self.sigma = nn.Parameter(torch.randn(1))  # 1个可学习参数 sigma
    def forward(self, input):
        temp = input * (1 + torch.erf(input / self.sigma)) / 2  # 前向传播
        return temp

class NetGELU(nn.Module):
    def __init__(self):
        super(NetGELU, self).__init__()
        self.flatten = nn.Flatten()  # 28*28二维图像展平成一维向量，size=784
        self.fc1 = nn.Linear(28*28, 30)  # 784维映射到50维
        self.custom1 = Custom()  # 自定义激活函数层
        self.dropout = nn.Dropout(0.5)  # 舍去50%的神经元
        self.fc2 = nn.Linear(30, 10)  # 50维映射到10维

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.custom1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 实例化模型、定义损失函数和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NetGELU().to(device)
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 50
z = model.custom1.sigma
nn.init.constant_(z, 4)
#std_dev = 0.01  # 高斯噪声的标准差

for epoch in range(num_epochs):
    model.train()  # 将模型设置为训练模式，以便在前向传播时启用 Dropout 和 BatchNorm
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        # with torch.no_grad():
        #     for param in model.parameters():
        #         param.grad += std_dev * torch.randn(param.grad.shape).to(param.grad.device)
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():  # 在验证过程中禁用梯度计算，以提高计算速度和节省内存。
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

print('Finished Training')

Epoch [1/50], Loss: 0.5303
Accuracy of the network on the 10000 test images: 93.23%
Epoch [2/50], Loss: 0.4128
Accuracy of the network on the 10000 test images: 94.13%
Epoch [3/50], Loss: 0.3888
Accuracy of the network on the 10000 test images: 93.90%
Epoch [4/50], Loss: 0.3725
Accuracy of the network on the 10000 test images: 94.29%
Epoch [5/50], Loss: 0.3646
Accuracy of the network on the 10000 test images: 95.05%
Epoch [6/50], Loss: 0.3523
Accuracy of the network on the 10000 test images: 94.81%
Epoch [7/50], Loss: 0.3488
Accuracy of the network on the 10000 test images: 94.85%
Epoch [8/50], Loss: 0.3447
Accuracy of the network on the 10000 test images: 95.34%
Epoch [9/50], Loss: 0.3426
Accuracy of the network on the 10000 test images: 95.14%
Epoch [10/50], Loss: 0.3356
Accuracy of the network on the 10000 test images: 95.37%
Epoch [11/50], Loss: 0.3330
Accuracy of the network on the 10000 test images: 95.51%
Epoch [12/50], Loss: 0.3314
Accuracy of the network on the 10000 test imag