In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
 
# 数据下载和标准化
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=0)
 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 

 
 
class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 两个conv1 conv2 层 两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 36 * 6 * 6)
        x = F.relu(self.fc2(F.relu(self.fc1(x))))
        return x
 
class CNNNet_2(nn.Module):
    #增加卷积层和池化层，加深网络结构
    def __init__(self):
        super(CNNNet_2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class CNNNet_3(nn.Module):
    #增加卷积核大小和步长，减小卷积层个数，增加全连接层个数
    def __init__(self):
        super(CNNNet_3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.fc1 = nn.Linear(2304, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 2304)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

Files already downloaded and verified


我们知道，学习率是神经⽹络训练中⼀个⽐较重要的超参数，学习率的好坏，在⼀定程度上能
对神经⽹络最终的学习效果造成影响。我们希望你设计实验探究不同学习率（1e-6~1e-1，⼗倍
率变化）对最终结果的影响，如何理解它对模型训练造成的影响？

In [3]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
 
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
#各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.303
[1, 4000] loss: 2.303
[1, 6000] loss: 2.303
[1, 8000] loss: 2.303
[1,10000] loss: 2.303
[1,12000] loss: 2.303
[2, 2000] loss: 2.303
[2, 4000] loss: 2.303
[2, 6000] loss: 2.303
[2, 8000] loss: 2.303
[2,10000] loss: 2.303
[2,12000] loss: 2.303
[3, 2000] loss: 2.303
[3, 4000] loss: 2.303
[3, 6000] loss: 2.303
[3, 8000] loss: 2.303
[3,10000] loss: 2.303
[3,12000] loss: 2.303
[4, 2000] loss: 2.303
[4, 4000] loss: 2.303
[4, 6000] loss: 2.303
[4, 8000] loss: 2.303
[4,10000] loss: 2.303
[4,12000] loss: 2.303
[5, 2000] loss: 2.303
[5, 4000] loss: 2.303
[5, 6000] loss: 2.303
[5, 8000] loss: 2.303
[5,10000] loss: 2.303
[5,12000] loss: 2.303
[6, 2000] loss: 2.303
[6, 4000] loss: 2.303
[6, 6000] loss: 2.303
[6, 8000] loss: 2.303
[6,10000] loss: 2.303
[6,12000] loss: 2.303
[7, 2000] loss: 2.303
[7, 4000] loss: 2.303
[7, 6000] loss: 2.303
[7, 8000] loss: 2.303
[7,10000] loss: 2.303
[7,12000] loss: 2.303
[8, 2000] loss: 2.303
[8, 4000] loss: 2.303
[8, 6000] loss: 2.303
[8, 8000] 

In [4]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)  
 
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
#各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.088
[1, 4000] loss: 1.872
[1, 6000] loss: 1.831
[1, 8000] loss: 1.836
[1,10000] loss: 1.826
[1,12000] loss: 1.788
[2, 2000] loss: 1.796
[2, 4000] loss: 1.795
[2, 6000] loss: 1.772
[2, 8000] loss: 1.791
[2,10000] loss: 1.800
[2,12000] loss: 1.795
[3, 2000] loss: 1.799
[3, 4000] loss: 1.769
[3, 6000] loss: 1.769
[3, 8000] loss: 1.786
[3,10000] loss: 1.801
[3,12000] loss: 1.806
[4, 2000] loss: 1.780
[4, 4000] loss: 1.827
[4, 6000] loss: 1.812
[4, 8000] loss: 1.780
[4,10000] loss: 1.819
[4,12000] loss: 1.812
[5, 2000] loss: 1.793
[5, 4000] loss: 1.791
[5, 6000] loss: 1.833
[5, 8000] loss: 1.807
[5,10000] loss: 1.803
[5,12000] loss: 1.884
[6, 2000] loss: 1.873
[6, 4000] loss: 1.816
[6, 6000] loss: 1.831
[6, 8000] loss: 1.865
[6,10000] loss: 1.844
[6,12000] loss: 1.851
[7, 2000] loss: 1.804
[7, 4000] loss: 1.803
[7, 6000] loss: 1.801
[7, 8000] loss: 1.800
[7,10000] loss: 1.808
[7,12000] loss: 1.845
[8, 2000] loss: 1.797
[8, 4000] loss: 1.793
[8, 6000] loss: 1.934
[8, 8000] 

In [5]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
#各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.006
[1, 4000] loss: 1.629
[1, 6000] loss: 1.503
[1, 8000] loss: 1.446
[1,10000] loss: 1.349
[1,12000] loss: 1.272
[2, 2000] loss: 1.176
[2, 4000] loss: 1.155
[2, 6000] loss: 1.139
[2, 8000] loss: 1.102
[2,10000] loss: 1.077
[2,12000] loss: 1.061
[3, 2000] loss: 0.945
[3, 4000] loss: 0.948
[3, 6000] loss: 0.944
[3, 8000] loss: 0.938
[3,10000] loss: 0.943
[3,12000] loss: 0.931
[4, 2000] loss: 0.798
[4, 4000] loss: 0.791
[4, 6000] loss: 0.832
[4, 8000] loss: 0.829
[4,10000] loss: 0.841
[4,12000] loss: 0.837
[5, 2000] loss: 0.701
[5, 4000] loss: 0.712
[5, 6000] loss: 0.723
[5, 8000] loss: 0.750
[5,10000] loss: 0.749
[5,12000] loss: 0.752
[6, 2000] loss: 0.596
[6, 4000] loss: 0.588
[6, 6000] loss: 0.647
[6, 8000] loss: 0.674
[6,10000] loss: 0.664
[6,12000] loss: 0.680
[7, 2000] loss: 0.511
[7, 4000] loss: 0.556
[7, 6000] loss: 0.559
[7, 8000] loss: 0.579
[7,10000] loss: 0.613
[7,12000] loss: 0.616
[8, 2000] loss: 0.436
[8, 4000] loss: 0.469
[8, 6000] loss: 0.500
[8, 8000] 

In [6]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)  
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
#各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.302
[1, 4000] loss: 2.298
[1, 6000] loss: 2.294
[1, 8000] loss: 2.285
[1,10000] loss: 2.268
[1,12000] loss: 2.217
[2, 2000] loss: 2.127
[2, 4000] loss: 2.063
[2, 6000] loss: 2.010
[2, 8000] loss: 1.947
[2,10000] loss: 1.912
[2,12000] loss: 1.877
[3, 2000] loss: 1.827
[3, 4000] loss: 1.783
[3, 6000] loss: 1.775
[3, 8000] loss: 1.769
[3,10000] loss: 1.755
[3,12000] loss: 1.734
[4, 2000] loss: 1.660
[4, 4000] loss: 1.619
[4, 6000] loss: 1.583
[4, 8000] loss: 1.577
[4,10000] loss: 1.582
[4,12000] loss: 1.546
[5, 2000] loss: 1.520
[5, 4000] loss: 1.461
[5, 6000] loss: 1.376
[5, 8000] loss: 1.359
[5,10000] loss: 1.337
[5,12000] loss: 1.323
[6, 2000] loss: 1.291
[6, 4000] loss: 1.287
[6, 6000] loss: 1.278
[6, 8000] loss: 1.265
[6,10000] loss: 1.272
[6,12000] loss: 1.252
[7, 2000] loss: 1.208
[7, 4000] loss: 1.207
[7, 6000] loss: 1.205
[7, 8000] loss: 1.194
[7,10000] loss: 1.170
[7,12000] loss: 1.189
[8, 2000] loss: 1.156
[8, 4000] loss: 1.117
[8, 6000] loss: 1.129
[8, 8000] 

In [8]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.00001, momentum=0.9) 
 
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
# 各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.300
[1, 4000] loss: 2.298
[1, 6000] loss: 2.296
[1, 8000] loss: 2.292
[1,10000] loss: 2.289
[1,12000] loss: 2.287
[2, 2000] loss: 2.282
[2, 4000] loss: 2.279
[2, 6000] loss: 2.276
[2, 8000] loss: 2.271
[2,10000] loss: 2.264
[2,12000] loss: 2.257
[3, 2000] loss: 2.248
[3, 4000] loss: 2.242
[3, 6000] loss: 2.231
[3, 8000] loss: 2.221
[3,10000] loss: 2.214
[3,12000] loss: 2.201
[4, 2000] loss: 2.191
[4, 4000] loss: 2.172
[4, 6000] loss: 2.167
[4, 8000] loss: 2.148
[4,10000] loss: 2.147
[4,12000] loss: 2.129
[5, 2000] loss: 2.109
[5, 4000] loss: 2.096
[5, 6000] loss: 2.094
[5, 8000] loss: 2.087
[5,10000] loss: 2.083
[5,12000] loss: 2.066
[6, 2000] loss: 2.072
[6, 4000] loss: 2.041
[6, 6000] loss: 2.046
[6, 8000] loss: 2.056
[6,10000] loss: 2.038
[6,12000] loss: 2.039
[7, 2000] loss: 2.036
[7, 4000] loss: 2.013
[7, 6000] loss: 2.031
[7, 8000] loss: 2.008
[7,10000] loss: 1.999
[7,12000] loss: 1.998
[8, 2000] loss: 1.993
[8, 4000] loss: 1.989
[8, 6000] loss: 1.982
[8, 8000] 

In [9]:
net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.000001, momentum=0.9) 
 
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
#各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

[1, 2000] loss: 2.301
[1, 4000] loss: 2.302
[1, 6000] loss: 2.302
[1, 8000] loss: 2.301
[1,10000] loss: 2.301
[1,12000] loss: 2.302
[2, 2000] loss: 2.301
[2, 4000] loss: 2.301
[2, 6000] loss: 2.301
[2, 8000] loss: 2.301
[2,10000] loss: 2.301
[2,12000] loss: 2.301
[3, 2000] loss: 2.300
[3, 4000] loss: 2.300
[3, 6000] loss: 2.301
[3, 8000] loss: 2.301
[3,10000] loss: 2.301
[3,12000] loss: 2.301
[4, 2000] loss: 2.301
[4, 4000] loss: 2.301
[4, 6000] loss: 2.301
[4, 8000] loss: 2.300
[4,10000] loss: 2.300
[4,12000] loss: 2.300
[5, 2000] loss: 2.301
[5, 4000] loss: 2.300
[5, 6000] loss: 2.301
[5, 8000] loss: 2.300
[5,10000] loss: 2.299
[5,12000] loss: 2.300
[6, 2000] loss: 2.300
[6, 4000] loss: 2.299
[6, 6000] loss: 2.300
[6, 8000] loss: 2.299
[6,10000] loss: 2.300
[6,12000] loss: 2.300
[7, 2000] loss: 2.299
[7, 4000] loss: 2.299
[7, 6000] loss: 2.300
[7, 8000] loss: 2.299
[7,10000] loss: 2.299
[7,12000] loss: 2.299
[8, 2000] loss: 2.299
[8, 4000] loss: 2.299
[8, 6000] loss: 2.299
[8, 8000] 

学习率较小（比如1e-6）时，参数更新的步幅很小，模型收敛速度较慢导致训练时间过长，甚至陷入局部最优解中无法跳出。但是，当学习率过大（比如1e-1）时，参数更新的步幅较大，模型可能会在训练过程中发生震荡或无法收敛。

数据对于⽹络的训练⾄关重要，⽽cifar10却⼜不是⼀个⾜够⼤的数据集，请对已有数据进⾏⼀
些简单操作如翻转、平移、添加噪声等操作进⾏数据增⼴，并研究各种增强⽅法和增强数据占
总数据的⽐例对于结果的影响，并探寻是否真的有必要进⾏数据增强操作？为什么会出现这种
现象？

In [None]:
#day5
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
 
# 数据下载和标准化
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.ColorJitter(),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transform_1 = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform_1)
testloader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=0)
 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 

 
 
class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 两个conv1 conv2 层 两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 36 * 6 * 6)
        x = F.relu(self.fc2(F.relu(self.fc1(x))))
        return x
 
class CNNNet_2(nn.Module):
    #增加卷积层和池化层，加深网络结构
    def __init__(self):
        super(CNNNet_2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class CNNNet_3(nn.Module):
    #增加卷积核大小和步长，减小卷积层个数，增加全连接层个数
    def __init__(self):
        super(CNNNet_3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.fc1 = nn.Linear(2304, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 2304)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  # 优化器的不同，将导致学习过拟合现象。
 
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
# 特别看一下各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

Files already downloaded and verified
[1, 2000] loss: 2.156
[1, 4000] loss: 1.705
[1, 6000] loss: 1.515
[1, 8000] loss: 1.423
[1,10000] loss: 1.336
[1,12000] loss: 1.287
[2, 2000] loss: 1.199
[2, 4000] loss: 1.142
[2, 6000] loss: 1.145
[2, 8000] loss: 1.091
[2,10000] loss: 1.071
[2,12000] loss: 1.041
[3, 2000] loss: 0.983
[3, 4000] loss: 0.965
[3, 6000] loss: 0.979
[3, 8000] loss: 0.956
[3,10000] loss: 0.933
[3,12000] loss: 0.926
[4, 2000] loss: 0.862
[4, 4000] loss: 0.865
[4, 6000] loss: 0.850
[4, 8000] loss: 0.862
[4,10000] loss: 0.867
[4,12000] loss: 0.849
[5, 2000] loss: 0.770
[5, 4000] loss: 0.775
[5, 6000] loss: 0.789
[5, 8000] loss: 0.790
[5,10000] loss: 0.793
[5,12000] loss: 0.801
[6, 2000] loss: 0.724
[6, 4000] loss: 0.706
[6, 6000] loss: 0.743
[6, 8000] loss: 0.760
[6,10000] loss: 0.736
[6,12000] loss: 0.742
[7, 2000] loss: 0.684
[7, 4000] loss: 0.662
[7, 6000] loss: 0.707
[7, 8000] loss: 0.677
[7,10000] loss: 0.714
[7,12000] loss: 0.699
[8, 2000] loss: 0.636
[8, 4000] loss: 

数据增广是一种常见的数据预处理技术，可以有效提高模型的泛化能力和鲁棒性。对于较小的数据集，进行数据增广可以进一步扩充训练集，减少过拟合的风险。


经过你前⼏天的探索，你已经知道了在不同的模型学习状态下相同学习率对于模型的影响不尽
相同，⽽⼀个动态的学习率可以很⼤程度上改善这个问题，所以请你设计⾄少两种动态学习率
⽅案并验证它们的有效性（附代码实现）

自动更新学习率OneCycleLR

In [11]:
#day5
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR


# 数据下载和标准化
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=0)
 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 

 
 
class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 两个conv1 conv2 层 两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 36 * 6 * 6)
        x = F.relu(self.fc2(F.relu(self.fc1(x))))
        return x
 

net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  # 优化器的不同，将导致学习过拟合现象。
scheduler = OneCycleLR(optimizer, max_lr=0.1, epochs=10, steps_per_epoch=len(trainloader))#这个调度器在整个训练过程中会让学习率在一个循环中先上升到一个最大值 0.1，然后再下降回初始值，
#这个过程会持续 10 个 epoch，并且它会在每个 epoch 结束时更新学习率。其中，steps_per_epoch 表示每个 epoch 中包含的训练步数，len(trainloader) 则表示训练数据集的大小除以 batch_size 后的结果，即训练集中总共有多少个 batch。
'''
它分为三个阶段：warm-up 阶段、持续下降阶段和 cool-down 阶段。

在 warm-up 阶段，学习率从初始值线性地增加到一个较高的最大值。这样做可以帮助模型迅速学习到全局信息，并避免陷入局部最优解。

在持续下降阶段，学习率保持在最大值的水平上。这样做的目的是确保模型能够在接近最大学习率的情况下进行充分的探索和学习，以便更好地逼近最优解。

最后，在 cool-down 阶段，学习率逐渐减小到一个较小的最小值，以便模型更稳定地收敛。这个阶段的目标是细化模型参数，提高模型的准确性。'''
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
    scheduler.step()
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
# 各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

Files already downloaded and verified
[1, 2000] loss: 2.099
[1, 4000] loss: 1.840
[1, 6000] loss: 1.771
[1, 8000] loss: 1.732
[1,10000] loss: 1.709
[1,12000] loss: 1.705
[2, 2000] loss: 1.678
[2, 4000] loss: 1.682
[2, 6000] loss: 1.659
[2, 8000] loss: 1.675
[2,10000] loss: 1.681
[2,12000] loss: 1.689
[3, 2000] loss: 1.640
[3, 4000] loss: 1.616
[3, 6000] loss: 1.613
[3, 8000] loss: 1.636
[3,10000] loss: 1.657
[3,12000] loss: 1.660
[4, 2000] loss: 1.620
[4, 4000] loss: 1.633
[4, 6000] loss: 1.642
[4, 8000] loss: 1.665
[4,10000] loss: 1.640
[4,12000] loss: 1.664
[5, 2000] loss: 1.580
[5, 4000] loss: 1.660
[5, 6000] loss: 1.616
[5, 8000] loss: 1.665
[5,10000] loss: 1.733
[5,12000] loss: 1.722
[6, 2000] loss: 1.671
[6, 4000] loss: 1.665
[6, 6000] loss: 1.658
[6, 8000] loss: 1.674
[6,10000] loss: 1.673
[6,12000] loss: 1.695
[7, 2000] loss: 1.630
[7, 4000] loss: 1.681
[7, 6000] loss: 1.694
[7, 8000] loss: 1.674
[7,10000] loss: 1.651
[7,12000] loss: 1.659
[8, 2000] loss: 1.676
[8, 4000] loss: 

学习率衰减方案

In [4]:
#day5
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

# 数据下载和标准化
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=0)
 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 

 
 
class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 两个conv1 conv2 层 两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 36 * 6 * 6)
        x = F.relu(self.fc2(F.relu(self.fc1(x))))
        return x
 

net = CNNNet_1()
net = net.to(device)
# print("net:",net)
 
# 训练模型
# 选择优化器
import torch.optim as optim
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  # 优化器的不同，将导致学习过拟合现象。
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)  # 每经过3个epoch后，学习率乘以gamma

for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
 
        # 权重参数梯度清零
        optimizer.zero_grad()
 
        # 正向和反向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
    scheduler.step()
print("Finished Training")
 
# 预测模型
correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))
 
# 各种类别的准确率
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
for i in range(10):
    print('Accuracy of %5s: %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

Files already downloaded and verified
[1, 2000] loss: 2.070
[1, 4000] loss: 1.663
[1, 6000] loss: 1.510
[1, 8000] loss: 1.398
[1,10000] loss: 1.334
[1,12000] loss: 1.270
[2, 2000] loss: 1.168
[2, 4000] loss: 1.146
[2, 6000] loss: 1.140
[2, 8000] loss: 1.096
[2,10000] loss: 1.073
[2,12000] loss: 1.038
[3, 2000] loss: 0.955
[3, 4000] loss: 0.934
[3, 6000] loss: 0.932
[3, 8000] loss: 0.927
[3,10000] loss: 0.921
[3,12000] loss: 0.927
[4, 2000] loss: 0.702
[4, 4000] loss: 0.682
[4, 6000] loss: 0.671
[4, 8000] loss: 0.654
[4,10000] loss: 0.639
[4,12000] loss: 0.636
[5, 2000] loss: 0.586
[5, 4000] loss: 0.615
[5, 6000] loss: 0.617
[5, 8000] loss: 0.611
[5,10000] loss: 0.632
[5,12000] loss: 0.633
[6, 2000] loss: 0.567
[6, 4000] loss: 0.586
[6, 6000] loss: 0.583
[6, 8000] loss: 0.573
[6,10000] loss: 0.596
[6,12000] loss: 0.581
[7, 2000] loss: 0.540
[7, 4000] loss: 0.539
[7, 6000] loss: 0.530
[7, 8000] loss: 0.517
[7,10000] loss: 0.556
[7,12000] loss: 0.536
[8, 2000] loss: 0.515
[8, 4000] loss: 

 众所周知，卷积核是卷积层的核⼼，尝试不同卷积核⼤⼩下的实验情况，特别注意两个极限情
况（极⼩卷积核和极⼤卷积核），理解并叙述在不同的核⼤⼩情况下等效于我们对输⼊矩阵进
⾏了何种操作？

In [5]:
#day5
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim

 
# 数据下载和标准化
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=0)
 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 

class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 三个conv1 conv2 conv3 层 和两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.conv3 = nn.Conv2d(in_channels=36, out_channels=64, kernel_size=1, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(-1, 64 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
# 小卷积核实验
net_small_kernel = CNNNet_1()
optimizer = optim.SGD(net_small_kernel.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()


for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # 获取训练数据
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # 权重参数梯度清零
        optimizer.zero_grad()

        # 正向和反向传播
        outputs = net_small_kernel(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # 显示损失值
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

correct = 0
tatal = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net_small_kernel(images)
        _, predicted = torch.max(outputs.data, 1)
        tatal += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the netwaork on the 10000 test images: %d %%' % (100 * correct / tatal))




Files already downloaded and verified
[1, 2000] loss: 2.134
[1, 4000] loss: 1.742
[1, 6000] loss: 1.562
[1, 8000] loss: 1.472
[1,10000] loss: 1.405
[1,12000] loss: 1.324
[2, 2000] loss: 1.249
[2, 4000] loss: 1.232
[2, 6000] loss: 1.197
[2, 8000] loss: 1.143
[2,10000] loss: 1.124
[2,12000] loss: 1.098
[3, 2000] loss: 0.978
[3, 4000] loss: 1.004
[3, 6000] loss: 0.983
[3, 8000] loss: 0.957
[3,10000] loss: 0.969
[3,12000] loss: 0.952
[4, 2000] loss: 0.836
[4, 4000] loss: 0.848
[4, 6000] loss: 0.830
[4, 8000] loss: 0.830
[4,10000] loss: 0.823
[4,12000] loss: 0.833
[5, 2000] loss: 0.694
[5, 4000] loss: 0.699
[5, 6000] loss: 0.741
[5, 8000] loss: 0.727
[5,10000] loss: 0.735
[5,12000] loss: 0.758
[6, 2000] loss: 0.580
[6, 4000] loss: 0.622
[6, 6000] loss: 0.628
[6, 8000] loss: 0.669
[6,10000] loss: 0.669
[6,12000] loss: 0.638
[7, 2000] loss: 0.491
[7, 4000] loss: 0.516
[7, 6000] loss: 0.546
[7, 8000] loss: 0.550
[7,10000] loss: 0.580
[7,12000] loss: 0.596
[8, 2000] loss: 0.409
[8, 4000] loss: 

大核部分，单独拿出来训练原因在小卷积核实验中，最后一个池化层的输出大小为64 * 6 * 6，用x.view(-1, 64 * 6 * 6)重塑张量形状但在大卷积核实验中，卷积层的参数设置改变了，导致最后一个池化层的输出大小不再是64 * 6 * 6，这部分代码没问题，有问题的拿出去在另一个day6(2)里了，即大核部分，不知道为啥，在这个文件里总是莫名奇妙消失掉

In [None]:
现在有⼀个模拟⼈⼯标注的cifar10数据集，但是因为标注⼈员的疏忽，有很⼩⽐例的验证数据被标注错位，请你使⽤你已经训练好的模型想办法找出这些错误数据，并简单提供查找的⽅法和相关信息（请不要尝试⼈⼯审阅，相信你的模型），数据链接：https://dian-new-member-1307282200.cos.ap-nanjing.myqcloud.com/test_batch

In [22]:
import torch
import torchvision
from torchvision import transforms
import requests
from PIL import Image
import pickle
import numpy as np

# # 下载数据集
# url = "https://dian-new-member-1307282200.cos.ap-nanjing.myqcloud.com/test_batch"
# response = requests.get(url, stream=True)
# response.raise_for_status()

# # 保存数据集到本地
# with open("test_batch", "wb") as file:
#     file.write(response.content)


# 使用pickle从二进制文件中读取数据
with open('./data/cifar-10-batches-py/test_batch', 'rb') as f:
    data = pickle.load(f)

# 将数据集转化为CIFAR-10格式，并将其替换test_batch
test_batch = {'data': data, 'labels': np.zeros((10000,), dtype=np.uint8)}

# 保存结果为二进制文件
with open('test_batch', 'wb') as f:
    pickle.dump(test_batch, f)



class CNNNet_1(nn.Module):
    """
    构建卷积神经网络
    搭建 两个conv1 conv2 层 两个pool 层 两个全连接层
    """
    def __init__(self):
        super(CNNNet_1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)
 
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        # print(x.shape)
        x = x.view(-1, 36 * 6 * 6)
        x = F.relu(self.fc2(F.relu(self.fc1(x))))
        return x
 

# 加载模型
net = CNNNet_1()
net.load_state_dict(torch.load('model.pth'))

# 错误数据的索引列表
misclassified_indices = []

# 对验证数据集进行推断并查找错误数据
with torch.no_grad():
    for i, (inputs, labels) in enumerate(testloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        if predicted != labels:
            misclassified_indices.append(i)

# 输出错误数据的数量与索引
print("错误数据数量：", len(misclassified_indices))
print("错误数据索引：", misclassified_indices)


错误数据数量： 3236
错误数据索引： [4, 6, 20, 22, 24, 25, 26, 30, 32, 35, 36, 37, 47, 52, 53, 55, 56, 57, 58, 59, 61, 62, 64, 69, 70, 71, 76, 81, 85, 87, 99, 100, 106, 108, 109, 110, 114, 118, 120, 125, 126, 127, 128, 129, 135, 139, 140, 143, 145, 147, 154, 156, 158, 162, 163, 165, 167, 169, 171, 172, 176, 183, 184, 188, 190, 192, 195, 201, 206, 210, 211, 214, 215, 216, 221, 223, 226, 227, 228, 229, 230, 233, 236, 238, 239, 245, 246, 247, 249, 253, 254, 257, 258, 264, 266, 271, 273, 275, 277, 279, 284, 287, 295, 302, 305, 306, 308, 309, 310, 312, 314, 321, 323, 324, 327, 328, 339, 340, 346, 354, 355, 356, 359, 368, 370, 376, 378, 388, 398, 399, 405, 406, 409, 416, 418, 421, 422, 426, 427, 428, 430, 432, 434, 438, 439, 441, 444, 445, 450, 455, 456, 459, 466, 474, 478, 483, 485, 494, 503, 508, 515, 518, 519, 525, 526, 531, 532, 539, 548, 549, 550, 552, 555, 556, 558, 565, 567, 569, 573, 577, 580, 591, 618, 621, 627, 628, 629, 630, 631, 632, 633, 637, 639, 642, 647, 649, 655, 658, 661, 663, 665, 668, 6