### 保存载入体系介绍

>在保存的时候下面的错，不知道什么原因：
>
><font color="red">RuntimeError: (NotFound) No Input(Filter) found for Conv operator.
  [Hint: Expected ctx->HasInput("Filter") == true, but received ctx->HasInput("Filter"):0 != true:1.] (at C:\home\workspace\Paddle_release\paddle/fluid/operators/conv_op.cc:42)
  [operator < conv2d > error]</font>
>

#### 基础API保存载入体系

>训练场景：使用paddle.save/load
>
>推理部署：使用paddle.jie.save/load(动态图)；paddle.satic.save/load_inference_model
>

#### 高层API保存载入体系

>paddle.Model.fit(训练接口，同时带有保存参数功能)
>
>paddle.Model.save
>
>paddle.Model.load

### 训练调优场景的（模型、参数）保存与载入

>训练阶段主要保存模型的参数，当模型的训练时间很长时，为了防止意外中断需要保存模型训练的参数
>

#### 动态图参数保存载入

>使用 paddle.save/load 结合Layer和Optimizer的state_dict达成目的，此处的state_dict是对象的持久化参数的载体，dict的key为参数名，value为参数真实值

In [1]:
import numpy as np
import pandas as pd
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
import warnings
warnings.filterwarnings("ignore")

In [2]:
BATCH_SIZE = 16
BATCH_NUM = 4
EPOCH_NUM = 4

IMAGE_SIZE = 784
CLASS_NUM = 10

In [3]:
# 定义一个随机数据集 
class RandomDateset(paddle.io.Dataset):
    def __init__(self,num_samples):
        self.num_samples = num_samples
        
    def __getitem__(self,idx):
        image = np.random.random([IMAGE_SIZE]).astype('float32')
        label = np.random.randint(0,CLASS_NUM-1,(1,)).astype('int64')
        return image,label
    
    def __len__(self):
        return self.num_samples

In [4]:
# 子类组网
class LinearNet(nn.Layer):
    def __init__(self):
        super(LinearNet,self).__init__()
        self._linear = nn.Linear(IMAGE_SIZE,CLASS_NUM)
        
    def forward(self,x):
        return self._linear(x)


In [5]:
# 定义训练函数
def train(layer,loader,loass_fn,opt):
    for epoch_id in range(EPOCH_NUM):
        for batch_id,data in enumerate(loader()):
            x = data[0]
            y = data[1]
            predicts = layer(x)
            loss = loss_fn(predicts,y)
            acc = paddle.metric.accuracy(predicts,y)
            loss.backward()
            opt.step()
            opt.clear_grad()
            print(f"epoch_id:{epoch_id},batch_id:{batch_id},loss:{loss.numpy()},acc:{acc.numpy()}")

In [6]:
# 定义网络、损失函数，优化器，精确度
layer = LinearNet()
loss_fn = paddle.nn.CrossEntropyLoss()
adam = paddle.optimizer.Adam(learning_rate=0.01,parameters=layer.parameters())
# 创建数据加载器，开始训练
dataset = RandomDateset(BATCH_NUM*BATCH_SIZE)
loader = paddle.io.DataLoader(dataset,batch_size=BATCH_SIZE,shuffle=True,drop_last=True,num_workers=2)



In [7]:
train(layer,loader,loss_fn,adam)

epoch_id:0,batch_id:0,loss:[2.5168338],acc:[0.0625]
epoch_id:0,batch_id:1,loss:[4.1976337],acc:[0.125]
epoch_id:0,batch_id:2,loss:[4.9551334],acc:[0.125]
epoch_id:0,batch_id:3,loss:[3.608651],acc:[0.0625]
epoch_id:1,batch_id:0,loss:[4.1102242],acc:[0.0625]
epoch_id:1,batch_id:1,loss:[3.0134223],acc:[0.0625]
epoch_id:1,batch_id:2,loss:[3.402636],acc:[0.125]
epoch_id:1,batch_id:3,loss:[3.0109348],acc:[0.25]
epoch_id:2,batch_id:0,loss:[3.4844694],acc:[0.]
epoch_id:2,batch_id:1,loss:[5.342682],acc:[0.0625]
epoch_id:2,batch_id:2,loss:[2.6492777],acc:[0.125]
epoch_id:2,batch_id:3,loss:[2.6705422],acc:[0.0625]
epoch_id:3,batch_id:0,loss:[2.9829288],acc:[0.1875]
epoch_id:3,batch_id:1,loss:[2.3503506],acc:[0.25]
epoch_id:3,batch_id:2,loss:[2.9422102],acc:[0.125]
epoch_id:3,batch_id:3,loss:[2.7587576],acc:[0.125]


In [8]:
# 保存参数
paddle.save(layer.state_dict(),"linear_net.pdparams")
paddle.save(adam.state_dict(),"adam.pdopt")

In [9]:
# 载入参数
layer_state_dict = paddle.load("linear_net.pdparams")
opt_state_dict = paddle.load("adam.pdopt")

In [10]:
# 使用下面方法将参数更新到模型中
layer.set_state_dict(layer_state_dict)
adam.set_state_dict(opt_state_dict)

### 中断训练与恢复训练案例

>基于手写数字识别模型学习paddle如何保存及加载模型
>
>

In [None]:
import paddle
import paddle.nn.functional as F
from paddle.nn import Layer
from paddle.vision.datasets import MNIST
from paddle.metric import Accuracy
from paddle.nn import Conv2D,MaxPool2D,Linear
from paddle.static import InputSpec
from paddle.vision.transforms import ToTensor

paddle.__version__

#### 加载数据，数据预处理

In [None]:
train_dataset = MNIST(mode="train",transform=ToTensor())
test_dataset = MNIST(mode="test",transform=ToTensor())

#### 构建模型

In [None]:
class MyModel(Layer):
    def __init__(self):
        super(MyModel,self).__init__()
        self.conv1 = Conv2D(in_channels=1,out_channels=6,kernel_size=5,stride=1,padding=2)
        self.max_pool1 = MaxPool2D(kernel_size=2,stride=2)
        self.conv2 = Conv2D(in_channels=6,out_channels=16,kernel_size=5,stride=1)
        self.max_pool2 = MaxPool2D(kernel_size=2,stride=2)
        self.linear1 = Linear(in_features=16*5*5,out_features=120)
        self.linear2 = Linear(in_features=120,out_features=84)
        self.linear3 = Linear(in_features=84,out_features=10)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.max_pool1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.max_pool2(x)
        x = paddle.flatten(x,start_axis=1,stop_axis=-1)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        x = self.linear3(x)
        return x

#### 模型训练配置

In [None]:
inputs = InputSpec([None,784],'float32','inputs')
labels = InputSpec([None,10],'float32','labels')
model = paddle.Model(MyModel(),inputs,labels)

optim = paddle.optimizer.Adam(learning_rate=0.001,parameters=model.parameters())
model.prepare(
    optim,
    paddle.nn.CrossEntropyLoss(),
    Accuracy()
)

model.fit(
    train_dataset,
    test_dataset,
    epochs=3,
    batch_size=64,
    save_dir='mnist_checkpoint',
    verbose=1
)

#### 上述模型在第2次训练被中断，现在使用load加载已训练完成的参数，继续训练

In [None]:
inputs = InputSpec([None,784],'float32','x')
labels = InputSpec([None,10],'float32','label')
model = paddle.Model(MyModel(),inputs,labels)

optim = paddle.optimizer.Adam(learning_rate=0.001,parameters=model.parameters())

# load 方法中的 path 为参数或优化器信息文件的前缀
model.load(path="./mnist_checkpoint/0")
model.prepare(
    optim,
    paddle.nn.CrossEntropyLoss(),
    Accuracy()
)

model.fit(
    train_dataset,
    test_dataset,
    # epochs 改为2，因为已经训练完成一次
    epochs=2,
    batch_size=64,
    save_dir='mnist_checkpoint',
    verbose=1
)

可以看到 loss 和 acc 与中断之前的数据相差不大

#### 保存模型的参数、优化器信息以及推理部署需要的参数与文件

In [None]:
# 保存训练的参数
# model.save("mnist_model/test")

# 保存推理部署需要的参数及文件
model.save("mnist_model/test",training=False)