In [1]:
# 代码实现了搭建了网络训练的基本框架
# 在此代码的基础上，需要实现下面的内容（请留意TODO）：
# 1、实现全连接神经网络的前向传播和反向传播（不使用torch.nn搭建网络，不使用backward方法进行反传）
# 2、实现交叉熵损失函数(不使用torch.nn.CrossEntropyLoss)
# 3、实现带动量的SGD优化器（不使用torch.optim.SGD）
# 代码可根据自己需要修改，实现上述内容即可
#  提示：
# 在实现过程中，可使用xxx.shape观察网络和数据的维度
# 可以将自己实现的输出与pytorch函数的输出进行比较(如损失函数与优化器)，观察自己的模块是否正常工作

import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

In [2]:
torch.set_default_tensor_type(torch.DoubleTensor)

In [3]:
def relu(x):
    y = x.clone()
    y[y<0] = 0
    return y

In [4]:
def crossEntropy(tensor1, tensor2):
    dim = tensor1.dim() - 1
    return - torch.sum(tensor1 * torch.log(tensor2 + 1e-10), dim=dim)

def softmax(tensor, dim=None):
    if dim is None:
        dim = tensor.dim() - 1
        
    max_values, _ = torch.max(tensor, dim=dim, keepdim=True)
    e = torch.exp(tensor-max_values)
    return e / torch.sum(e, dim=dim, keepdim=True)

In [5]:
# TODO：在这里你需要实现一些类来实现上述三个内容
# 类的设计并无具体要求，能实现所需功能即可
# 比如，可以考虑先构建单层全连接层Layer类，再组成整体网络Net类
# 可单独设置Loss类与SGD类，也可以将这些功能的实现放到Net类中

# 一种可能的类的设计为
from torch import nn
from torch.nn import functional as F
class Net(nn.Module):  # TODO:在这里实现全连接神经网络
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, initial=None):
        super(Net, self).__init__()
        self.w1 = torch.randn((input_size, hidden_size1), dtype=torch.float64)
        self.b1 = torch.randn((hidden_size1,), dtype=torch.float64)
        self.w2 = torch.randn((hidden_size1, hidden_size2), dtype=torch.float64)
        self.b2 = torch.randn((hidden_size2,), dtype=torch.float64)
        self.w3 = torch.randn((hidden_size2, output_size), dtype=torch.float64)
        self.b3 = torch.randn((output_size,), dtype=torch.float64)
        
        nn.init.kaiming_normal_(self.w1)
        nn.init.kaiming_normal_(self.w2)
        nn.init.kaiming_normal_(self.w3)
        
        # 梯度
        self.w1_grad = torch.zeros_like(self.w1, dtype=torch.float64)
        self.b1_grad = torch.zeros_like(self.b1, dtype=torch.float64)
        self.w2_grad = torch.zeros_like(self.w2, dtype=torch.float64)
        self.b2_grad = torch.zeros_like(self.b2, dtype=torch.float64)
        self.w3_grad = torch.zeros_like(self.w3, dtype=torch.float64)
        self.b3_grad = torch.zeros_like(self.b3, dtype=torch.float64)
        
        # 前一次迭代的梯度
        self.w1_t = torch.zeros_like(self.w1, dtype=torch.float64)
        self.b1_t = torch.zeros_like(self.b1, dtype=torch.float64)
        self.w2_t = torch.zeros_like(self.w2, dtype=torch.float64)
        self.b2_t = torch.zeros_like(self.b2, dtype=torch.float64)
        self.w3_t = torch.zeros_like(self.w3, dtype=torch.float64)
        self.b3_t = torch.zeros_like(self.b3, dtype=torch.float64)        
        
    def __call__(self, data):
        if torch.is_tensor(data):
            self.input = data.double()
        else:
            self.input = torch.tensor(data, dtype=torch.float64)
            
        self.h1 = torch.mm(self.input, self.w1) + self.b1
        self.h1_relu = relu(self.h1)
        
        self.h2 = torch.mm(self.h1_relu, self.w2) + self.b2
        self.h2_relu = relu(self.h2)
        
        self.output = torch.mm(self.h2_relu, self.w3) + self.b3
        
        return self.output
    
    def loss(self, label):
        if not torch.is_tensor(label):
            self.label = torch.tensor(label)
        else:
            self.label = label
            
        catogery = self.output.shape[1]
        onehot = torch.zeros((label.shape[0], catogery), dtype=torch.float64)
        self.onehot_label = onehot.scatter(1, label.unsqueeze(1), 1.0)
        
        loss = crossEntropy(self.onehot_label, softmax(self.output))

        return torch.mean(loss)     
    
    def backward(self):
        # 记录上一次迭代的梯度
        self.w1_t, self.b1_t = self.w1_grad, self.b1_grad
        self.w2_t, self.b2_t = self.w2_grad, self.b2_grad
        self.w3_t, self.b3_t = self.w3_grad, self.b3_grad
        
        output_grad = (softmax(self.output)  - self.onehot_label) / self.output.shape[0]
        
        
        self.w3_grad = torch.mm(self.h2_relu.permute(1, 0), output_grad)
        self.b3_grad = torch.sum(output_grad, dim=0)
        
        h2_relu_grad = torch.mm(output_grad, self.w3.permute(1, 0))
        h2_grad = h2_relu_grad.clone()
        h2_grad[self.h2 < 0] = 0
        
        self.w2_grad = torch.mm(self.h1_relu.permute(1, 0), h2_grad)
        self.b2_grad = torch.sum(h2_grad, dim=0)
        
        h1_relu_grad = torch.mm(h2_grad, self.w2.permute(1, 0))
        h1_grad = h1_relu_grad.clone()
        h1_grad[self.h1 < 0] = 0
        
        self.w1_grad = torch.mm(self.input.permute(1, 0), h1_grad)
        self.b1_grad = torch.sum(h1_grad, dim=0)
        
        
    def SGD(self, lrate, gama):
        self.w1 -= gama * lrate * self.w1_t + lrate * self.w1_grad
        self.b1 -= gama * lrate * self.b1_t +lrate * self.b1_grad
        self.w2 -= gama *  lrate * self.w2_t + lrate * self.w2_grad
        self.b2 -= gama * lrate * self.b2_t + lrate * self.b2_grad
        self.w3 -= gama *  lrate * self.w3_t + lrate * self.w3_grad
        self.b3 -= gama * lrate * self.b3_t + lrate * self.b3_grad

        

In [7]:
# 对训练过程的准确率和损失画图
def training_process(train_loss, train_acc, test_acc):
    shape = train_loss.shape[0]
    epoch = np.arange(1, shape+1)

    plt.plot(epoch, test_acc, label="testAcc")
    plt.plot(epoch, train_acc, label="trainAcc")
    plt.xlabel("epoch")
    plt.ylabel("accuracy")
    plt.title("accuracy on train set and test set")
    plt.legend()
    plt.show()

    plt.plot(epoch, train_loss, label="loss")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.title("loss on train set")
    plt.legend()
    plt.show()

In [8]:
if __name__ == "__main__":
    
    # 可直接使用这组超参数进行训练，也可以自己尝试调整
    lr = 0.02  # 学习率
    epoch = 20  # 迭代次数
    batch_size = 128  # 每一批次的大小
    gama = 0.9
    
    # 对数据集图片做标准化并转为tensor
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])  # 对训练集的transform
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])  # 对测试集的transform

    # 借助torchvision中的函数读取MNIST，请将参数root换为自己数据存放的路径，或者设置download=True下载数据集
    # 读MNIST训练集
    path = "G:\\grade3_second\\artificial neural networks\\lab\Lab_weeks 3-4\\week3"
    trainSet = MNIST(root=path, train=True, transform=transform_train, download=False)
    trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
    # 读MNIST测试集
    testSet = MNIST(root=path, train=False, transform=transform_test, download=False)
    testLoader = torch.utils.data.DataLoader(testSet, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=True)


In [12]:
    # 训练数据的记录
    train_acc = np.zeros(epoch)
    test_acc = np.zeros(epoch)
    train_loss = np.zeros(epoch)
    
    # TODO：在这里对你实现的类进行实例化，之后开始对模型进行训练
    input_size = 784
    hidden_size1 = 128
    hidden_size2 = 128
    output_size = 10
    net = Net(input_size, hidden_size1, hidden_size2, output_size)  # 具体的实例化根据你的实现而定，此处只做示意（包括下面两行）
 
    # 重复训练epoch次
    for epo in range(epoch):
        epoch_loss = 0  # 当前epoch的损失
        correct1 = 0  # 当前epoch的训练集准确率
        correct2 = 0  # 当前epoch的测试集准确率

        # 训练阶段
        # 利用每个mini-batch对网络进行更新
        for index, (data, label) in enumerate(trainLoader):  # 从trainLoader读取一个mini-batch
            
            # index是当前mini-batch的序号，data是图像，label是标签，data和label都有batch_size个
            data = data.view(data.size(0), -1)  # 展开，将输入的维度从[batch_size, 1, 28, 28]变成[batch_size, 784]
            output = net(data)  # TODO：完成前向传播，其中net是你实现的三层全连接神经网络，具体调用形式根据你的实现而定（包括下面三个）

            # 计算训练集准确率，output是网络的输出，维度应为[batch_size, 10]
            _, prediction = torch.max(output, 1)
            correct1 += (prediction == label).sum()
            loss = net.loss(label)
            net.backward()
            net.SGD(lr, gama)
            epoch_loss += loss.item()
            
            if index % 100 == 99:
                print('%d epoch %d index: acc: %f loss:%f' % (epo, index, correct1.item() *100 / ((index + 1) * batch_size), epoch_loss / (index + 1)))

        # 测试阶段
        # 测试时不需要tensor的梯度，可调用no_grad关掉梯度
#         with torch.no_grad():
#             for index, (data, label) in enumerate(testLoader):# 从testLoader读取一个mini-batch
#                 data = data.view(data.size(0), -1)
#                 output = net(data)  # 与上面对前向传播的实现保持一致

#                 # 计算测试集准确率
#                 _, prediction = torch.max(output.data, 1)
#                 correct2 += (prediction == label).sum()

        # 计算训练集和测试集准确率
        epoch_train_acc = (int(correct1) * 100 / 60000)
        epoch_test_acc = (int(correct2) * 100 / 10000)

        # 输出当前epoch的信息
        print("-------%2d-------" % epo)
        print("Epoch loss: %4.2f" % epoch_loss)
        print("Train acc: %3.2f%%" % epoch_train_acc)
        print("Test acc: %3.2f%%" % epoch_test_acc)
        print()

        # 记录loss和accuracy
        train_acc[epo] = epoch_train_acc
        test_acc[epo] = epoch_test_acc
        train_loss[epo] = epoch_loss

        # 至此当前epoch结束

0 epoch 99 index: acc: 61.570312 loss:1.824845
0 epoch 199 index: acc: 72.082031 loss:1.188068
0 epoch 299 index: acc: 76.966146 loss:0.936535
0 epoch 399 index: acc: 79.750000 loss:0.796455
------- 0-------
Epoch loss: 340.74
Train acc: 81.11%
Test acc: 0.00%

1 epoch 99 index: acc: 90.734375 loss:0.299952
1 epoch 199 index: acc: 90.964844 loss:0.295352
1 epoch 299 index: acc: 91.348958 loss:0.285275
1 epoch 399 index: acc: 91.679688 loss:0.276909
------- 1-------
Epoch loss: 126.38
Train acc: 91.75%
Test acc: 0.00%

2 epoch 99 index: acc: 93.335938 loss:0.223295
2 epoch 199 index: acc: 93.445312 loss:0.218423
2 epoch 299 index: acc: 93.770833 loss:0.208506
2 epoch 399 index: acc: 93.755859 loss:0.207511
------- 2-------
Epoch loss: 96.39
Train acc: 93.63%
Test acc: 0.00%

3 epoch 99 index: acc: 94.664062 loss:0.180838
3 epoch 199 index: acc: 94.601562 loss:0.181904
3 epoch 299 index: acc: 94.627604 loss:0.180526
3 epoch 399 index: acc: 94.654297 loss:0.177265
------- 3-------
Epoch l

In [16]:
#         测试阶段
#         测试时不需要tensor的梯度，可调用no_grad关掉梯度
with torch.no_grad():
    for index, (data, label) in enumerate(testLoader):# 从testLoader读取一个mini-batch
        data = data.view(data.size(0), -1)
        output = net(data)  # 与上面对前向传播的实现保持一致

        # 计算测试集准确率
        _, prediction = torch.max(output.data, 1)
        correct2 += (prediction == label).sum()
epoch_test_acc = (int(correct2) * 100 / 10000)
print("Test acc: %3.2f%%" % epoch_test_acc)

Test acc: 96.23%
