In [1]:
import paddle.fluid as fluid
import paddle
import numpy as np
import os
import math
from paddle.fluid.debugger import draw_block_graphviz

In [2]:
# 预训练模型保存路径
# 来自：https://github.com/PaddlePaddle/models/tree/develop/fluid/image_classification#supported-models-and-performances
pretrained_model_path = "se_resnext_50/129"

In [3]:
# 根据program加载参数
def load_pretrained_params(exe, program):

    # 只加载实际存在的
    def if_exist(var):
        path = os.path.join(pretrained_model_path, var.name)
        exist = os.path.exists(path)
        if exist:
                print("Load", path)
        return exist
    
    fluid.io.load_vars(exe, pretrained_model_path, predicate=if_exist, main_program=program)

In [4]:
# 预训练模型定义，已去掉不需要的层
# 来自：
class SE_ResNeXt50():
    
    def __init__(self):
        self.variables = []
    
    def net(self, input, class_dim=1000):
        cardinality = 32
        reduction_ratio = 16
        depth = [3, 4, 6] #, 3]
        num_filters = [128, 256, 512, 1024]
        
        conv = self.conv_bn_layer(
            input=input,
            num_filters=64,
            filter_size=7,
            stride=2,
            act='relu')
        conv = fluid.layers.pool2d(
            input=conv,
            pool_size=3,
            pool_stride=2,
            pool_padding=1,
            pool_type='max')
        
        for block in range(len(depth)):
            for i in range(depth[block]):
                conv = self.bottleneck_block(
                    input=conv,
                    num_filters=num_filters[block],
                    stride=2 if i == 0 and block != 0 else 1,
                    cardinality=cardinality,
                    reduction_ratio=reduction_ratio)
        pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True)            
        return pool

    def shortcut(self, input, ch_out, stride):
        ch_in = input.shape[1]
        if ch_in != ch_out or stride != 1:
            filter_size = 1
            return self.conv_bn_layer(input, ch_out, filter_size, stride)
        else:
            return input

    def bottleneck_block(self, input, num_filters, stride, cardinality,
                         reduction_ratio):
        conv0 = self.conv_bn_layer(
            input=input, num_filters=num_filters, filter_size=1, act='relu')
        conv1 = self.conv_bn_layer(
            input=conv0,
            num_filters=num_filters,
            filter_size=3,
            stride=stride,
            groups=cardinality,
            act='relu')
        conv2 = self.conv_bn_layer(
            input=conv1, num_filters=num_filters * 2, filter_size=1, act=None)
        scale = self.squeeze_excitation(
            input=conv2,
            num_channels=num_filters * 2,
            reduction_ratio=reduction_ratio)

        short = self.shortcut(input, num_filters * 2, stride)

        return fluid.layers.elementwise_add(x=short, y=scale, act='relu')

    def conv_bn_layer(self,
                      input,
                      num_filters,
                      filter_size,
                      stride=1,
                      groups=1,
                      act=None):
        conv = fluid.layers.conv2d(
            input=input,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) / 2,
            groups=groups,
            act=None,
            bias_attr=False)
        self.variables.append(conv)
        bn = fluid.layers.batch_norm(input=conv, act=act)
        self.variables.append(bn)
        return bn

    def squeeze_excitation(self, input, num_channels, reduction_ratio):
        pool = fluid.layers.pool2d(
            input=input, pool_size=0, pool_type='avg', global_pooling=True)
        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
        squeeze = fluid.layers.fc(input=pool,
                                  size=num_channels / reduction_ratio,
                                  act='relu',
                                  param_attr=fluid.param_attr.ParamAttr(
                                      initializer=fluid.initializer.Uniform(
                                          -stdv, stdv)))
        self.variables.append(squeeze)
        stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
        excitation = fluid.layers.fc(input=squeeze,
                                     size=num_channels,
                                     act='sigmoid',
                                     param_attr=fluid.param_attr.ParamAttr(
                                         initializer=fluid.initializer.Uniform(
                                             -stdv, stdv)))
        self.variables.append(excitation)
        scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
        return scale

In [5]:
# 自定义模型
def network(image, train_base_model=False):
  
    # 预训练模型
    base_model = SE_ResNeXt50().net(image)
    # 控制是否训练base_model
    base_model.stop_gradient = not train_base_model 
    
    # 复制一个只包含base_model的program，放便只加载需要的参数
    base_model_program = fluid.default_main_program().clone() 
    
    # 添加新的层
    y = base_model
    y = fluid.layers.fc(base_model, size=10, act='softmax')
    
    return y, base_model_program

In [6]:
use_cuda = fluid.core.is_compiled_with_cuda()
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

In [7]:
# 训练新加的层
main = fluid.Program()
startup = fluid.Program()
exe = fluid.Executor(place)

with fluid.unique_name.guard():
    with fluid.program_guard(main, startup):

            # 预测
            image = fluid.layers.data('image', [3, 32, 32], dtype='float32')
            predict, base_model_program = network(image, False) # 只训练新加的层
           
            # 损失
            label = fluid.layers.data('label', [1], dtype='int64')
            loss = fluid.layers.cross_entropy(label=label, input=predict)
            loss = fluid.layers.mean(loss)
            
            # Metric
            acc = fluid.layers.accuracy(predict, label)
            
            # Feeder
            feeder = fluid.data_feeder.DataFeeder([image, label], place)
            reader = feeder.decorate_reader(paddle.batch(paddle.dataset.cifar.train10(), 128), None)

            # opt
            optimizer = fluid.optimizer.Adam(learning_rate=0.001)
            optimizer.minimize(loss)

            # 初始化变量
            exe.run(startup)

            # 加载参数
            load_pretrained_params(exe, base_model_program)

            optimized_main = fluid.transpiler.memory_optimize(main, print_log=False)

           
            for epoch in range(10):
                for batch, data in enumerate(reader()):
                    result = exe.run(fetch_list=[loss, acc], feed=data)
                    if batch % 100 == 0:
                        print("Epoch %d Batch %d Loss %f Acc %f" % (epoch, batch, result[0][0], result[1][0]))

('Load', 'se_resnext_50/129/batch_norm_10.w_1')
('Load', 'se_resnext_50/129/batch_norm_33.w_2')
('Load', 'se_resnext_50/129/batch_norm_33.w_0')
('Load', 'se_resnext_50/129/conv2d_33.w_0')
('Load', 'se_resnext_50/129/batch_norm_17.w_1')
('Load', 'se_resnext_50/129/batch_norm_32.w_0')
('Load', 'se_resnext_50/129/batch_norm_31.w_0')
('Load', 'se_resnext_50/129/batch_norm_28.b_0')
('Load', 'se_resnext_50/129/batch_norm_25.b_0')
('Load', 'se_resnext_50/129/batch_norm_31.w_2')
('Load', 'se_resnext_50/129/batch_norm_31.w_1')
('Load', 'se_resnext_50/129/batch_norm_26.w_0')
('Load', 'se_resnext_50/129/batch_norm_31.b_0')
('Load', 'se_resnext_50/129/conv2d_31.w_0')
('Load', 'se_resnext_50/129/fc_17.w_0')
('Load', 'se_resnext_50/129/fc_16.b_0')
('Load', 'se_resnext_50/129/fc_16.w_0')
('Load', 'se_resnext_50/129/fc_8.w_0')
('Load', 'se_resnext_50/129/batch_norm_42.w_2')
('Load', 'se_resnext_50/129/batch_norm_30.w_1')
('Load', 'se_resnext_50/129/conv2d_1.w_0')
('Load', 'se_resnext_50/129/conv2d_30.

Epoch 0 Batch 0 Loss 2.311845 Acc 0.054688
Epoch 0 Batch 100 Loss 2.132499 Acc 0.453125
Epoch 0 Batch 200 Loss 1.953558 Acc 0.539062
Epoch 0 Batch 300 Loss 1.840348 Acc 0.546875
Epoch 1 Batch 0 Loss 1.777899 Acc 0.515625
Epoch 1 Batch 100 Loss 1.705209 Acc 0.609375
Epoch 1 Batch 200 Loss 1.572993 Acc 0.570312
Epoch 1 Batch 300 Loss 1.525481 Acc 0.578125
Epoch 2 Batch 0 Loss 1.530093 Acc 0.539062
Epoch 2 Batch 100 Loss 1.480704 Acc 0.632812
Epoch 2 Batch 200 Loss 1.379631 Acc 0.593750
Epoch 2 Batch 300 Loss 1.353684 Acc 0.593750
Epoch 3 Batch 0 Loss 1.393383 Acc 0.546875
Epoch 3 Batch 100 Loss 1.345412 Acc 0.625000
Epoch 3 Batch 200 Loss 1.266868 Acc 0.593750
Epoch 3 Batch 300 Loss 1.247394 Acc 0.625000
Epoch 4 Batch 0 Loss 1.308322 Acc 0.570312
Epoch 4 Batch 100 Loss 1.255022 Acc 0.640625
Epoch 4 Batch 200 Loss 1.193787 Acc 0.601562
Epoch 4 Batch 300 Loss 1.174929 Acc 0.640625
Epoch 5 Batch 0 Loss 1.250353 Acc 0.593750
Epoch 5 Batch 100 Loss 1.189938 Acc 0.656250
Epoch 5 Batch 200 Loss

In [8]:
# 保存全部网络参数
fluid.io.save_params(exe, "cifar10.model", main_program=main)

In [None]:
# 训练整个网络
main = fluid.Program()
startup = fluid.Program()
exe = fluid.Executor(place)

with fluid.unique_name.guard():
    with fluid.program_guard(main, startup):

            # 预测
            image = fluid.layers.data('image', [3, 32, 32], dtype='float32')
            predict, base_model_program = network(image, True)
            
            # 损失
            label = fluid.layers.data('label', [1], dtype='int64')
            loss = fluid.layers.cross_entropy(label=label, input=predict)
            loss = fluid.layers.mean(loss)
            
            # Metric
            acc = fluid.layers.accuracy(predict, label)
            
            test_program = main.clone(for_test=True)
            
            # Feeder
            feeder = fluid.data_feeder.DataFeeder([image, label], place)
            reader = feeder.decorate_reader(paddle.batch(paddle.dataset.cifar.train10(), 128), None)
            
            test_feeder = fluid.data_feeder.DataFeeder([image, label], place)
            test_reader = test_feeder.decorate_reader(paddle.batch(paddle.dataset.cifar.test10(), 128), None)  

            # opt
            optimizer = fluid.optimizer.Adam(learning_rate=0.0001)
            optimizer.minimize(loss)

            # 初始化变量
            exe.run(startup)

            # 加载参数
            fluid.io.load_params(exe, "cifar10.model", main_program=main)

            optimized_main = fluid.transpiler.memory_optimize(main, print_log=False)

           
            for epoch in range(10):
                # 训练
                for batch, data in enumerate(reader()):
                    result = exe.run(fetch_list=[loss, acc], feed=data)
                    if batch % 100 == 0:
                        print("Epoch %d Batch %d Loss %f Acc %f" % (epoch, batch, result[0][0], result[1][0]))
                fluid.io.save_params(exe, "cifar10.model", main_program=main)
                
                # 测试
                test_acc = []
                for test_data in test_reader():
                    result = exe.run(test_program, fetch_list=[acc], feed=test_data)
                    test_acc.append(result[0][0])
                
                print("Epoch %d Test Acc %f" % (epoch, np.mean(test_acc)))

Epoch 0 Batch 0 Loss 1.110775 Acc 0.640625
Epoch 0 Batch 100 Loss 0.675765 Acc 0.781250
Epoch 0 Batch 200 Loss 0.561026 Acc 0.804688
Epoch 0 Batch 300 Loss 0.399023 Acc 0.843750
Epoch 0 Test Acc 0.838241
Epoch 1 Batch 0 Loss 0.339029 Acc 0.875000
Epoch 1 Batch 100 Loss 0.324468 Acc 0.875000
Epoch 1 Batch 200 Loss 0.180062 Acc 0.914062
Epoch 1 Batch 300 Loss 0.110104 Acc 0.968750
Epoch 1 Test Acc 0.843349
Epoch 2 Batch 0 Loss 0.078147 Acc 0.992188
Epoch 2 Batch 100 Loss 0.078254 Acc 0.976562
Epoch 2 Batch 200 Loss 0.013229 Acc 1.000000
Epoch 2 Batch 300 Loss 0.021719 Acc 0.992188
Epoch 2 Test Acc 0.846955
Epoch 3 Batch 0 Loss 0.007396 Acc 1.000000
Epoch 3 Batch 100 Loss 0.008246 Acc 1.000000
Epoch 3 Batch 200 Loss 0.012331 Acc 1.000000
Epoch 3 Batch 300 Loss 0.010200 Acc 1.000000
Epoch 3 Test Acc 0.844050
Epoch 4 Batch 0 Loss 0.034095 Acc 0.976562
Epoch 4 Batch 100 Loss 0.043675 Acc 0.984375
Epoch 4 Batch 200 Loss 0.059676 Acc 0.984375
Epoch 4 Batch 300 Loss 0.028524 Acc 1.000000
Epoch 