In [47]:
import torch
import torch.nn as nn

import torchvision as tv

In [48]:
class Net(nn.Module):
    def __init__(self, num_clases=10):
        super(Net, self).__init__()

        self.c1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.c2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.c3 = nn.Sequential(
            nn.Conv2d(16, 120, kernel_size=5),
            nn.BatchNorm2d(120),
            nn.ReLU()
        )

        self.fc1 = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(84, 10),
            nn.LogSoftmax()
        )
    # 正向传播，反向传播通过loss.backward()
    def forward(self, x):
        out = self.c1(x)
        out = self.c2(out)
        out = self.c3(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

model = Net().cuda()
print(model)


Net(
  (c1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (c2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (c3): Sequential(
    (0): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc1): Sequential(
    (0): Linear(in_features=120, out_features=84, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=84, out_features=10, bias=True)
    (1): LogSoftmax()
  )
)


In [49]:
params = list(model.parameters())
print(len(params))
print(params[0].size())


16
torch.Size([6, 1, 5, 5])


In [51]:
def load_data():
    train_dataset = tv.datasets.MNIST("./", download=True, transform=tv.transforms.ToTensor())
    test_dataset = tv.datasets.MNIST("./", train=False, transform=tv.transforms.ToTensor())
    trainloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=128)
    testloader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=128)
    return trainloader,testloader

# 网络训练
def trans(num_epochs):
    # 网络训练
    # 交叉熵代价函数和优化器
    lossfunc = torch.nn.CrossEntropyLoss().cuda()

    # params (iterable) – 待优化参数的iterable或者是定义了参数组的dict
    # lr (float, 可选) – 学习率（默认：1e-3）
    # betas (Tuple[float, float], 可选) – 用于计算梯度以及梯度平方的运行平均值的系数（默认：0.9，0.999）
    # eps (float, 可选) – 为了增加数值计算的稳定性而加到分母里的项（默认：1e-8）
    # weight_decay (float, 可选) – 权重衰减（L2惩罚）（默认: 0）

    optimizer = torch.optim.Adam(model.parameters(), 1e-4)
    for epoch in range(num_epochs):
        for i,(data, label) in enumerate(trainloader):
            data, label = data.cuda(), label.cuda()
            model.zero_grad()
            outputs = model(data)
            loss = lossfunc(outputs, label)
            # 计算梯度
            loss.backward()
            # 权重参数更新
            optimizer.step()
            '''
            # 训练100张计算准确率
            if i % 100 == 0:
                print(i, acc(outputs, label))
            '''

def test_model(model, testloader):
    result = []
    for i,(data,label) in enumerate(testloader):
        data,label = data.cuda(),label.cuda()
        outputs = model(data)
        result.append(acc(outputs,label))
        count = i
    result = sum(result) / len(result)
    return result


# 准确率
def acc(outputs,label):
    _,data = torch.max(outputs,dim=1)
    return torch.mean((data.float()==label.float()).float()).item()





# 超参数,训练10轮
num_epochs = 10


trainloader, testloader = load_data()
trans(num_epochs)
res = test_model(model, testloader)
print('accuracy:', res)
# 网络保存
torch.save(model.state_dict(), "./base.pt")

  input = module(input)


accuracy: 0.9879351265822784


## 输出32*32
## c1:6个5*5步长为1的卷积核 --> 输出6个((32-5)/1+1) = 28,即6个28*28特征图
## c2:2*2池化 --> 输出6个 14*14特征图
## c3:16个5*5卷积核卷积 --> 输出16个((14-5))/1 + 1) = 10,即16个10*10特征图
## c3:2*2池化 --> 输出16个 5*5特征图
## fc1:120个5*5步长为1的卷积核 --> 输出(5x5x16+1)x120 = 48120个连接
## fc2:全连接 训练参数：84*(120+1)=10164
## output:全连接10个节点代表0-10
