In [None]:
import os
import torch
import torchvision
from torchvision import transforms
import datasets
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import matplotlib.pyplot as plt

# 数据格式
train_transform = transforms.Compose([
# ToTensor 将输入的数据转换为Tensor的格式
    transforms.ToTensor(), 
    # Normalize([0.5], [0.5] 归一化处理[-1,1]，因为mnist数据集为黑白图片，depth是1，
    transforms.Normalize([0.5], [0.5]),
    # 以 50% 的概率随机垂直翻转图像
    transforms.RandomVerticalFlip(0.5),
    # 以 50% 的概率随机水平翻转图像
    transforms.RandomHorizontalFlip(0.5)
])

test_transform = transforms.Compose([
# ToTensor 将输入的数据转换为Tensor的格式
    transforms.ToTensor(), 
    # Normalize([0.5], [0.5] 归一化处理[-1,1]，因为mnist数据集为黑白图片，depth是1，
    transforms.Normalize([0.5], [0.5]),
])

# 训练集
train_data = torchvision.datasets.MNIST(
    root='./data/',             # 数据存放的路径
    train=True,                 # 作为训练集
    transform=train_transform,  # 传入数据转化格式
    download=False if os.path.exists('./data/') else True
)

figure = plt.figure(figsize=(8, 8))
sample_idx = torch.randint(len(train_data), size=(16,))
row, column = 0, 0
for i, pict_index in enumerate(sample_idx):
    img, label = train_data[i]
    figure.add_subplot(4, 4, i+1)
    plt.title(str(label))
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

# 测试集
test_data = torchvision.datasets.MNIST(
    root='./data/',             # 数据存放的路径
    train=False,                # 作为测试集
    transform=test_transform,  # 传入数据转化格式
    download=False if os.path.exists('./data/') else True
)

# 每批装载的数据图片设置为64
Batch_size = 128

# 加载训练数据集
train_data_loader = torch.utils.data.DataLoader(
    dataset=train_data,  # 数据集为训练集
    batch_size=Batch_size,
    shuffle=True         # 打乱顺序
)
# 加载测试数据集
test_data_loader = torch.utils.data.DataLoader(
    dataset=test_data, 
    batch_size=Batch_size, 
    shuffle=False
)
        
# 构建卷积神经网络模型---LeNet-5
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)
        self.relu5 = nn.ReLU()
        self.dp = torch.nn.Dropout(p=0.3)

    # 前向传播
    def forward(self, x):
        y = self.relu1(self.conv1(x))
        y = self.pool1(y)
        y = self.relu2(self.conv2(y))
        y = self.pool2(y)
        y = y.view(y.shape[0], -1)
        y = self.relu3(self.fc1(y))
        y = self.dp(y)
        y = self.relu4(self.fc2(y))
        y = self.dp(y)
        y = self.relu5(self.fc3(y))
        return y

optims = ['Adagrad', 'RMSprop', 'Adam', 'SGD']

for optim in optims:
    model = Model().to('cuda')
    
    # 将模型状态转换为训练模式，启用BatchNormalization和 Dropout，修改权值
    model.train()
    
    # tensorboard 记录训练输出
    writer = SummaryWriter(f'./log/{optim}')
    
    # 定义优化器
    if optim == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=0.005, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
    elif optim == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    elif optim == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    elif optim == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0, dampening=0, weight_decay=0, nesterov=False)
    else:
        raise Exception('optimizer error')
    
    # 定义损失函数
    loss_func = torch.nn.CrossEntropyLoss()
    
    # 设置迭代次数为5次
    n_epochs = 10
    
    best_test_acc = 0.
    
    # 训练模型
    for epoch in range(n_epochs):
        train_loss = 0.  # 训练的损失值
        train_correct = 0.  # 训练正确的个数
        print("Epoch {}/{}".format(epoch, n_epochs))
        print("--------------------------------------------")
        for step, sample in enumerate(train_data_loader):
            # 图片和标签
            img = sample[0].to('cuda')
            label = sample[1].to('cuda')
            
            # 得到结果，每个结果为一行，一行10个值，表示0-9的概率
            outputs = model(img)
            
            # 获取1维度即行维度（每行）的最大值（_）和最大值对应的索引（pred）
            _, pred = torch.max(outputs.data, 1)
            
            # 求loss
            loss = loss_func(outputs, label)

            # 清零，清除上一次结果的影响
            optimizer.zero_grad()
            
            # 反向传播
            loss.backward()
            
            # loss 记录
            writer.add_scalar('Train Loss', loss, step)
            
            # 更新所有的参数、优化
            optimizer.step()

            # .item()获取最里面的值，此处即loss值
            train_loss += loss.item()
    
            # 统计每批数据的正确个数
            train_correct += torch.sum(pred == label.data)
            
            if step%100 == 0:
                # 仅测试，不修改权值
                model.eval()  
                test_correct = 0      # 保存正确的个数
                for test_sample in test_data_loader:
                    # 图片和标签
                    test_img = test_sample[0].to('cuda')
                    test_label = test_sample[1].to('cuda')
                    # 得到结果，每个结果为一行，一行10个值，表示0-9的概率
                    test_outputs = model(test_img)
                    # 获取1维度即行维度（每行）的最大值（_）和最大值对应的索引（pred）
                    _, test_pred = torch.max(test_outputs.data, 1)
                    # 统计每批数据的正确个数
                    test_correct += torch.sum(test_pred == test_label.data)
                    
                # 正确率
                test_acc = 100 * test_correct / len(test_data)
                writer.add_scalar('test_acc', test_acc, step)
                
                if test_acc > best_test_acc:
                    best_test_acc = test_acc
                    # 保存训练好的模型
                    torch.save(model.state_dict(), f"./model/{optim}_model.pk")
                model.train()
            
        # 平均损失值
        train_loss_avg = train_loss / len(train_data)
        
        # 正确率
        train_acc = 100 * train_correct / len(train_data)
        
        print("Loss is:{:.2f}, Train Accuracy is:{:.2f}%".format(train_loss_avg, train_acc))
        print("--------------------------------------------")
        print()



In [None]:
import os
import torch
import torchvision
from torchvision import transforms
import datasets
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import random
import numpy as np


def seed_everything(seed=27):
    '''
    设置整个开发环境的seed
    :param seed:
    :param device:
    :return:
    '''
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed
    # unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything()

# 数据格式
train_transform = transforms.Compose([
# ToTensor 将输入的数据转换为Tensor的格式
    transforms.ToTensor(), 
    # Normalize([0.5], [0.5] 归一化处理[-1,1]，因为mnist数据集为黑白图片，depth是1，
    transforms.Normalize([0.5], [0.5]),
    # 以 50% 的概率随机垂直翻转图像
    transforms.RandomVerticalFlip(0.5),
    # 以 50% 的概率随机水平翻转图像
    transforms.RandomHorizontalFlip(0.5)
])

test_transform = transforms.Compose([
# ToTensor 将输入的数据转换为Tensor的格式
    transforms.ToTensor(), 
    # Normalize([0.5], [0.5] 归一化处理[-1,1]，因为mnist数据集为黑白图片，depth是1，
    transforms.Normalize([0.5], [0.5]),
])

# 训练集
train_data = torchvision.datasets.MNIST(
    root='./data/',             # 数据存放的路径
    train=True,                 # 作为训练集
    transform=train_transform,  # 传入数据转化格式
    download=False if os.path.exists('./data/') else True
)

# 测试集
test_data = torchvision.datasets.MNIST(
    root='./data/',             # 数据存放的路径
    train=False,                # 作为测试集
    transform=test_transform,  # 传入数据转化格式
    download=False if os.path.exists('./data/') else True
)

# 每批装载的数据图片设置为64
Batch_size = 32

# 加载训练数据集
train_data_loader = torch.utils.data.DataLoader(
    dataset=train_data,  # 数据集为训练集
    batch_size=Batch_size,
    shuffle=True         # 打乱顺序
)
# 加载测试数据集
test_data_loader = torch.utils.data.DataLoader(
    dataset=test_data, 
    batch_size=Batch_size, 
    shuffle=False
)
        
# 构建卷积神经网络模型---LeNet-5
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)
        self.relu5 = nn.ReLU()
        self.dp = torch.nn.Dropout(p=0.3)

    # 前向传播
    def forward(self, x):
        y = self.relu1(self.conv1(x))
        y = self.pool1(y)
        y = self.relu2(self.conv2(y))
        y = self.pool2(y)
        y = y.view(y.shape[0], -1)
        y = self.relu3(self.fc1(y))
        y = self.dp(y)
        y = self.relu4(self.fc2(y))
        y = self.dp(y)
        y = self.relu5(self.fc3(y))
        return y

lrs = [1e-4, 3e-4, 5e-4, 1e-3, 3e-3]

for lr in lrs:
    model = Model().to('cuda')
    
    # 将模型状态转换为训练模式，启用BatchNormalization和 Dropout，修改权值
    model.train()
    
    # tensorboard 记录训练输出
    writer = SummaryWriter(f'./log/lr2/{lr}')
    
    # 定义优化器
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    
    # 定义损失函数
    loss_func = torch.nn.CrossEntropyLoss()
    
    # 设置迭代次数为50次
    n_epochs = 50
    
    best_test_acc = 0.
    
    # 训练模型
    for epoch in range(n_epochs):
        train_loss = 0.  # 训练的损失值
        train_correct = 0.  # 训练正确的个数
        for step, sample in enumerate(train_data_loader):
            # 图片和标签
            img = sample[0].to('cuda')
            label = sample[1].to('cuda')
            
            # 得到结果，每个结果为一行，一行10个值，表示0-9的概率
            outputs = model(img)
            
            # 获取1维度即行维度（每行）的最大值（_）和最大值对应的索引（pred）
            _, pred = torch.max(outputs.data, 1)
            
            # 求loss
            loss = loss_func(outputs, label)

            # 清零，清除上一次结果的影响
            optimizer.zero_grad()
            
            # 反向传播
            loss.backward()
            
            # 更新所有的参数、优化
            optimizer.step()

            # .item()获取最里面的值，此处即loss值
            train_loss += loss.item()
    
            # 统计每批数据的正确个数
            train_correct += torch.sum(pred == label.data)
            
        # 平均损失值
        train_loss_avg = train_loss / len(train_data)
        
        # 正确率
        train_acc = 100 * train_correct / len(train_data)
        
        # loss 记录
        writer.add_scalar('Train Loss', train_loss_avg, epoch)
        # 正确率记录
        writer.add_scalar('train_acc', train_acc, epoch)
        
        # 计算梯度范数
        grad_norm = 0.0
        for param in model.parameters():
            grad_norm += (param.grad.data ** 2).sum()
        grad_norm = grad_norm.sqrt().item()

        writer.add_scalar('Gradient norm', grad_norm, epoch)
        
        model.eval()  
        test_correct = 0      # 保存正确的个数
        for test_sample in test_data_loader:
            # 图片和标签
            test_img = test_sample[0].to('cuda')
            test_label = test_sample[1].to('cuda')
            # 得到结果，每个结果为一行，一行10个值，表示0-9的概率
            test_outputs = model(test_img)
            # 获取1维度即行维度（每行）的最大值（_）和最大值对应的索引（pred）
            _, test_pred = torch.max(test_outputs.data, 1)
            # 统计每批数据的正确个数
            test_correct += torch.sum(test_pred == test_label.data)
            
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            # 保存训练好的模型
            torch.save(model.state_dict(), f"./model/{lr}_model.pk")
        model.train()
        
         # 正确率
        test_acc = 100 * test_correct / len(test_data)
        writer.add_scalar('test_acc', test_acc, epoch)

