[使用 VisualDL 可视化模型，数据和训练-使用文档-PaddlePaddle深度学习平台](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/advanced/visualdl_usage_cn.html)

运行该代码后，将会创建一个./runs/mnist_experiment 文件夹，用于存储写入到 VisualDL 的数据。
可以在训练程序执行前、中、后任意一个阶段，启动 VisualDL 的可视化服务、读取数据、并进入浏览器查看。启动命令为：
visualdl --logdir ./runs/mnist_experiment --model ./runs/mnist_experiment/model.pdmodel --host 0.0.0.0 --port 8040
--logdir：与使用 LogWriter 时指定的参数相同。
--model：（可选）为保存的网络模型结构文件。
--host：指定服务的 IP 地址。
--port：指定服务的端口地址。
在命令行中输入上述命令启动服务后，可以在浏览器中输入 http://localhost:8040 (也可以查看 ip 地址，将 localhost 换成 ip)进行查看。
如果是在AI Studio上训练程序，可以在模型训练结束后，参考如下界面设置日志文件所在目录和模型文件，启动 VisualDL 的可视化服务。

In [8]:
import os
import random

import numpy as np
# 加载飞桨相关库
import paddle
from paddle.nn import Conv2D, MaxPool2D, Linear
import paddle.nn.functional as F
# 从 visualdl 库中引入 LogWriter 类
from visualdl import LogWriter
# 创建 LogWriter 对象，指定 logdir 参数，如果指定路径不存在将会创建一个文件夹
logwriter = LogWriter(logdir='./runs/mnist_experiment')

In [3]:

# 数据载入
class MNISTDataset(paddle.io.Dataset):
    def __init__(self, mode='train'):
        self.mnist_data = paddle.vision.datasets.MNIST(mode=mode)

    def __getitem__(self, idx):
        data, label = self.mnist_data[idx]
        data = np.reshape(data, [1, 28, 28]).astype('float32') / 255
        label = np.reshape(label, [1]).astype('int64')
        return (data, label)

    def __len__(self):
        return len(self.mnist_data)

In [4]:

train_loader = paddle.io.DataLoader(MNISTDataset(mode='train'),batch_size=16,shuffle=True)
test_loader = paddle.io.DataLoader(MNISTDataset(mode='test'),batch_size=16,shuffle=False)

Cache file C:\Users\ljc\.cache\paddle\dataset\mnist\t10k-images-idx3-ubyte.gz not found, downloading https://dataset.bj.bcebos.com/mnist/t10k-images-idx3-ubyte.gz 
Begin to download

Download finished
Cache file C:\Users\ljc\.cache\paddle\dataset\mnist\t10k-labels-idx1-ubyte.gz not found, downloading https://dataset.bj.bcebos.com/mnist/t10k-labels-idx1-ubyte.gz 
Begin to download
..
Download finished


In [5]:

# 定义 mnist 数据识别网络模型结构
class MNIST(paddle.nn.Layer):
    def __init__(self):
        super(MNIST, self).__init__()

        # 定义卷积层，输出特征通道 out_channels 设置为 20，卷积核的大小 kernel_size 为 5，卷积步长 stride=1，padding=2
        self.conv1 = Conv2D(in_channels=1, out_channels=20, kernel_size=5, stride=1, padding=2)
        # 定义池化层，池化核的大小 kernel_size 为 2，池化步长为 2
        self.max_pool1 = MaxPool2D(kernel_size=2, stride=2)
        # 定义卷积层，输出特征通道 out_channels 设置为 20，卷积核的大小 kernel_size 为 5，卷积步长 stride=1，padding=2
        self.conv2 = Conv2D(in_channels=20, out_channels=20, kernel_size=5, stride=1, padding=2)
        # 定义池化层，池化核的大小 kernel_size 为 2，池化步长为 2
        self.max_pool2 = MaxPool2D(kernel_size=2, stride=2)
        # 定义一层全连接层，输出维度是 10
        self.fc = Linear(in_features=980, out_features=10)

    # 定义网络前向计算过程，卷积后紧接着使用池化层，最后使用全连接层计算最终输出
    # 卷积层激活函数使用 Relu，全连接层激活函数使用 softmax
    def forward(self, inputs):
        x = self.conv1(inputs)
        x = F.relu(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.max_pool2(x)
        x = paddle.reshape(x, [x.shape[0], -1])
        x = self.fc(x)
        return x

In [6]:

#创建模型
model = MNIST()


#设置优化器
opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters())
EPOCH_NUM = 10
for epoch_id in range(EPOCH_NUM):
    model.train()
    for batch_id, data in enumerate(train_loader()):
        #准备数据
        images, labels = data

        #前向计算的过程
        predicts = model(images)

        #计算损失，取一个批次样本损失的平均值
        loss = F.cross_entropy(predicts, labels)
        avg_loss = paddle.mean(loss)

        #每训练了 100 批次的数据，打印下当前 Loss 的情况
        if batch_id % 200 == 0:
            print("epoch: {}, batch: {}, loss is: {}".format(epoch_id, batch_id, avg_loss.numpy()))

        #后向传播，更新参数的过程
        avg_loss.backward()
        # 最小化 loss,更新参数
        opt.step()
        # 清除梯度
        opt.clear_grad()

    # evaluate model after one epoch
    model.eval()
    accuracies = []
    losses = []
    for batch_id, data in enumerate(test_loader):
        #准备数据
        images, labels = data
        #前向计算的过程
        predicts = model(images)
        #计算损失
        loss = F.cross_entropy(predicts, labels)
        #计算准确率
        acc = paddle.metric.accuracy(predicts, labels)
        accuracies.append(acc.numpy())
        losses.append(loss.numpy())

    avg_acc, avg_loss = np.mean(accuracies), np.mean(losses)
    print("[validation]After epoch {}: accuracy/loss: {}/{}".format(epoch_id, avg_acc, avg_loss))

#保存模型参数
paddle.save(model.state_dict(), 'mnist.pdparams')


epoch: 0, batch: 0, loss is: [2.4059968]
epoch: 0, batch: 200, loss is: [1.7189382]
epoch: 0, batch: 400, loss is: [1.0198879]
epoch: 0, batch: 600, loss is: [1.0962524]
epoch: 0, batch: 800, loss is: [0.6769724]
epoch: 0, batch: 1000, loss is: [0.65520746]
epoch: 0, batch: 1200, loss is: [0.5241337]
epoch: 0, batch: 1400, loss is: [0.4246518]
epoch: 0, batch: 1600, loss is: [0.43745375]
epoch: 0, batch: 1800, loss is: [0.22160484]
epoch: 0, batch: 2000, loss is: [0.17028359]
epoch: 0, batch: 2200, loss is: [0.70554364]
epoch: 0, batch: 2400, loss is: [0.2472116]
epoch: 0, batch: 2600, loss is: [0.41409796]
epoch: 0, batch: 2800, loss is: [0.1965942]
epoch: 0, batch: 3000, loss is: [0.47332242]
epoch: 0, batch: 3200, loss is: [0.21878421]
epoch: 0, batch: 3400, loss is: [0.16712691]
epoch: 0, batch: 3600, loss is: [0.06845479]
[validation]After epoch 0: accuracy/loss: 0.927299976348877/0.26732635498046875
epoch: 1, batch: 0, loss is: [0.53019166]
epoch: 1, batch: 200, loss is: [0.19599

In [9]:
# 查看 9 张输入的训练图像的样例
dataset = MNISTDataset(mode='train')
image_matrix = []
for i in range(9):
    image, label = dataset[i]
    # 将 dataset 中的 CHW 排列的图像转换成 HWC 排列再写入 VisualDL
    image_matrix.append(image.transpose([1,2,0]))
# 将九张输入图像合成长宽相同的图像网格，即 3X3 的图像网格
logwriter.add_image_matrix(tag='input_images', step=1, imgs=image_matrix, rows=-1)