### 关于神经网络的理论知识(包括卷积神经网络)
参考：https://nndl.github.io/nndl-book.pdf

In [1]:
# 1 导入需要的内容
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms



In [2]:
# 2 图像预处理
data_tf = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.5], [0.5])]
)


In [3]:
# 3 创建模型：4个卷积层+2个全连接
# 使用了批标准化和dropout，relu激活
class SelfMadeNN(nn.Module):
    def __init__(self, n_hidden_1, n_hidden_2, out_dim):
        super(SelfMadeNN, self).__init__()
        self.layer1 = nn.Sequential(
            # (28-3)/1 +1 = 26
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3),
            nn.BatchNorm2d(16),  # 批标准化放在全连接层后面，激活层前面
            nn.ReLU(inplace=True)
        )
        self.layer2 = nn.Sequential(
            # (26-3)/1 +1 = 24
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
            nn.BatchNorm2d(32),  # 批标准化放在全连接层后面，激活层前面
            nn.ReLU(inplace=True),
            # (24-2)/2 +1 = 12
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            # (12-3)/1 +1 = 10
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),  # 批标准化放在全连接层后面，激活层前面
            nn.ReLU(inplace=True)
        )
        self.layer4 = nn.Sequential(
            # (10-3)/1 +1 = 8
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
            nn.BatchNorm2d(128),  # 批标准化放在全连接层后面，激活层前面
            nn.ReLU(inplace=True),
            # (8-2)/2 +1 = 4
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer5 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(4 * 4 * 128, n_hidden_1),
            nn.ReLU(inplace=True),
            
            nn.Dropout(),
            nn.Linear(n_hidden_1, n_hidden_2),
            nn.ReLU(inplace=True),
            
            nn.Linear(n_hidden_2, out_dim),

        )

    def forward(self, x):
        # 卷积 
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        # 全连接，卷积输出的矩阵转为向量
        x = x.view(x.size(0), -1)
        x = self.layer5(x)
        
        return x



In [7]:
# 4 设置参数
batch_size = 64
learning_rate = 0.001
momentum = 0.9
num_epoch = 10

hidden1_nodes = 100
hidden2_nodes = 200
output_nodes = 10

In [8]:
SelfMadeNN(hidden1_nodes, hidden2_nodes, output_nodes)

SelfMadeNN(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (

In [9]:
# 画图
def plot_image(acces, eval_losses, eval_acces):
    plt.plot(np.arange(len(acces)), acces)
    plt.title('train acc')
    plt.show()

    plt.plot(np.arange(len(eval_losses)), eval_losses)
    plt.title('test loss')
    plt.show()

    plt.plot(np.arange(len(eval_acces)), eval_acces)
    plt.title('test acc')
    plt.show()

In [11]:
def train_and_test():
    # 1 设置参数
    batch_size = 64
    learning_rate = 0.001
    momentum = 0.9
    num_epoch = 10

    hidden1_nodes = 100
    hidden2_nodes = 200
    output_nodes = 10

    # 2 准备数据集
    # 2.1 下载数据集
    train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True)
    test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
    print(train_set[0][0].shape)
    # 2 分成batch
    train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    # 3 创建模型
    model = SelfMadeNN(hidden1_nodes, hidden2_nodes, output_nodes)
    # 3.1 定义 loss 函数
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)  # 使用随机梯度下降，学习率 0.1

    # 3.2 开始训练
    losses = []
    acces = []
    eval_losses = []
    eval_acces = []

    for e in range(num_epoch):
        train_loss = 0
        train_acc = 0
        model.train()
        for im, label in train_data:
            # 前向传播
            out = model(im)
            loss = criterion(out, label)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 记录误差
            train_loss += loss.item()
            # 计算分类的准确率
            _, pred = out.max(1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            train_acc += acc

        losses.append(train_loss / len(train_data))
        acces.append(train_acc / len(train_data))

        # 在测试集上检验效果
        eval_loss = 0
        eval_acc = 0
        model.eval()  # 将模型改为预测模式
        for im, label in test_data:
            out = model(im)
            loss = criterion(out, label)
            # 记录误差
            eval_loss += loss.item()
            # 记录准确率
            _, pred = torch.max(out,1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            eval_acc += acc

        eval_losses.append(eval_loss / len(test_data))
        eval_acces.append(eval_acc / len(test_data))
        print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
              .format(e, train_loss / len(train_data), train_acc / len(train_data),
                      eval_loss / len(test_data), eval_acc / len(test_data)))

    print("train and test-------------------Done!")
    plot_image(acces, eval_losses, eval_acces)


### 由于运行比较慢，下图结果是使用Google colab运行的
![jupyter](./demo30.png)