Skip to content

Commit

Permalink
Add Mbv3 Demo in Full Quant (PaddlePaddle#1413)
Browse files Browse the repository at this point in the history
  • Loading branch information
RachelXu7 committed Sep 6, 2022
1 parent 371fdc4 commit d3ba08b
Show file tree
Hide file tree
Showing 9 changed files with 784 additions and 59 deletions.
1 change: 1 addition & 0 deletions example/auto_compression/image_classification/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def eval(self):
results.append([top_1, top_5])

result = np.mean(np.array(results), axis=0)
t.update()
print('Evaluation result: {}'.format(result[0]))


Expand Down
72 changes: 38 additions & 34 deletions example/auto_compression/image_classification/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import functools
from functools import partial
import math
from tqdm import tqdm

import numpy as np
import paddle
Expand Down Expand Up @@ -87,40 +88,43 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
resize_size=resize_size)

results = []
print('Evaluating...')
for batch_id, (image, label) in enumerate(val_loader):
# top1_acc, top5_acc
if len(test_feed_names) == 1:
image = np.array(image)
label = np.array(label).astype('int64')
pred = exe.run(compiled_test_program,
feed={test_feed_names[0]: image},
fetch_list=test_fetch_list)
pred = np.array(pred[0])
label = np.array(label)
sort_array = pred.argsort(axis=1)
top_1_pred = sort_array[:, -1:][:, ::-1]
top_1 = np.mean(label == top_1_pred)
top_5_pred = sort_array[:, -5:][:, ::-1]
acc_num = 0
for i in range(len(label)):
if label[i][0] in top_5_pred[i]:
acc_num += 1
top_5 = float(acc_num) / len(label)
results.append([top_1, top_5])
else:
# eval "eval model", which inputs are image and label, output is top1 and top5 accuracy
image = np.array(image)
label = np.array(label).astype('int64')
result = exe.run(
compiled_test_program,
feed={test_feed_names[0]: image,
test_feed_names[1]: label},
fetch_list=test_fetch_list)
result = [np.mean(r) for r in result]
results.append(result)
if batch_id % 100 == 0:
print('Eval iter: ', batch_id)
with tqdm(
total=len(val_loader),
bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
ncols=80) as t:
for batch_id, (image, label) in enumerate(val_loader):
# top1_acc, top5_acc
if len(test_feed_names) == 1:
image = np.array(image)
label = np.array(label).astype('int64')
pred = exe.run(compiled_test_program,
feed={test_feed_names[0]: image},
fetch_list=test_fetch_list)
pred = np.array(pred[0])
label = np.array(label)
sort_array = pred.argsort(axis=1)
top_1_pred = sort_array[:, -1:][:, ::-1]
top_1 = np.mean(label == top_1_pred)
top_5_pred = sort_array[:, -5:][:, ::-1]
acc_num = 0
for i in range(len(label)):
if label[i][0] in top_5_pred[i]:
acc_num += 1
top_5 = float(acc_num) / len(label)
results.append([top_1, top_5])
else:
# eval "eval model", which inputs are image and label, output is top1 and top5 accuracy
image = np.array(image)
label = np.array(label).astype('int64')
result = exe.run(compiled_test_program,
feed={
test_feed_names[0]: image,
test_feed_names[1]: label
},
fetch_list=test_fetch_list)
result = [np.mean(r) for r in result]
results.append(result)
t.update()
result = np.mean(np.array(results), axis=0)
return result[0]

Expand Down
54 changes: 29 additions & 25 deletions example/full_quantization/detection/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np
import argparse
import paddle
from tqdm import tqdm
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
Expand Down Expand Up @@ -78,31 +79,34 @@ def convert_numpy_data(data, metric):

def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
metric = global_config['metric']
for batch_id, data in enumerate(val_loader):
data_all = convert_numpy_data(data, metric)
data_input = {}
for k, v in data.items():
if isinstance(global_config['input_list'], list):
if k in test_feed_names:
data_input[k] = np.array(v)
elif isinstance(global_config['input_list'], dict):
if k in global_config['input_list'].keys():
data_input[global_config['input_list'][k]] = np.array(v)
outs = exe.run(compiled_test_program,
feed=data_input,
fetch_list=test_fetch_list,
return_numpy=False)
res = {}
for out in outs:
v = np.array(out)
if len(v.shape) > 1:
res['bbox'] = v
else:
res['bbox_num'] = v

metric.update(data_all, res)
if batch_id % 100 == 0:
print('Eval iter:', batch_id)
with tqdm(
total=len(val_loader),
bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
ncols=80) as t:
for batch_id, data in enumerate(val_loader):
data_all = convert_numpy_data(data, metric)
data_input = {}
for k, v in data.items():
if isinstance(global_config['input_list'], list):
if k in test_feed_names:
data_input[k] = np.array(v)
elif isinstance(global_config['input_list'], dict):
if k in global_config['input_list'].keys():
data_input[global_config['input_list'][k]] = np.array(v)
outs = exe.run(compiled_test_program,
feed=data_input,
fetch_list=test_fetch_list,
return_numpy=False)
res = {}
for out in outs:
v = np.array(out)
if len(v.shape) > 1:
res['bbox'] = v
else:
res['bbox_num'] = v

metric.update(data_all, res)
t.update()
metric.accumulate()
metric.log()
map_res = metric.get_results()
Expand Down
106 changes: 106 additions & 0 deletions example/full_quantization/image_classification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# 图像分类模型全量化示例

目录:
- [1. 简介](#1简介)
- [2. Benchmark](#2Benchmark)
- [3. 全量化流程](#全量化流程)
- [3.1 准备环境](#31-准备准备)
- [3.2 准备数据集](#32-准备数据集)
- [3.3 准备预测模型](#33-准备预测模型)
- [3.4 全量化并产出模型](#34-全量化并产出模型)
- [4. 预测部署](#4预测部署)
- [4.1 PaddleLite端侧部署](#42-PaddleLite端侧部署)
- [5. FAQ](5FAQ)


## 1. 简介
本示例将以图像分类模型MobileNetV1为例,介绍如何使用PaddleClas中Inference部署模型进行全量化。本示例全量化的策略使用了量化训练和蒸馏。

## 2. Benchmark

### PaddleClas模型

| 模型 | 策略 | Top-1 Acc | GPU 耗时(ms) | ARM CPU 耗时(ms) | 配置文件 | Inference模型 |
|:------:|:------:|:------:|:------:|:------:|:------:|:------:|
| MobileNetV3_large_x1_0 | Baseline | 75.32 | - | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_infer.tar) |
| MobileNetV3_large_x1_0 | 全量化 | 74.41 | - | - | [Config](./configs/MobileNetV3_large_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV3_large_x1_0_QAT.tar) |


## 3. 全量化流程

#### 3.1 准备环境

- python >= 3.6
- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装)
- PaddleSlim >= 2.3

安装paddlepaddle:
```shell
# CPU
pip install paddlepaddle
# GPU
pip install paddlepaddle-gpu
```

安装paddleslim:
```shell
pip install paddleslim
```

#### 3.2 准备数据集
本案例默认以ImageNet1k数据进行全量化实验,如数据集为非ImageNet1k格式数据, 请参考[PaddleClas数据准备文档](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/data_preparation/classification_dataset.md)。将下载好的数据集放在当前目录下`./ILSVRC2012`


#### 3.3 准备预测模型
预测模型的格式为:`model.pdmodel``model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。

注:其他像`__model__``__params__`分别对应`model.pdmodel``model.pdiparams`文件。

可在[PaddleClas预训练模型库](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/algorithm_introduction/ImageNet_models.md)中直接获取Inference模型,具体可参考下方获取MobileNetV1模型示例:

```shell
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_infer.tar
tar -xf MobileNetV3_large_x1_0_infer.tar
```
也可根据[PaddleClas文档](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/inference_deployment/export_model.md)导出Inference模型。

#### 3.4 全量化并产出模型

全量化示例通过run.py脚本启动,会使用接口 ```paddleslim.auto_compression.AutoCompression``` 对模型进行量化训练和蒸馏。配置config文件中模型路径、数据集路径、蒸馏、量化和训练等部分的参数,配置完成后便可开始全量化。

**单卡启动**

```shell
export CUDA_VISIBLE_DEVICES=0
python run.py --save_dir='./save_quant_mobilev3/' --config_path='./configs/mobilenetv3_large_qat_dis.yaml'
```

**多卡启动**

图像分类训练任务中往往包含大量训练数据,以ImageNet为例,ImageNet22k数据集中包含1400W张图像,如果使用单卡训练,会非常耗时,使用分布式训练可以达到几乎线性的加速比。

```shell
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch run.py --save_dir='./save_quant_mobilev3/' --config_path='./configs/mobilenetv3_large_qat_dis.yaml'
```
多卡训练指的是将训练任务按照一定方法拆分到多个训练节点完成数据读取、前向计算、反向梯度计算等过程,并将计算出的梯度上传至服务节点。服务节点在收到所有训练节点传来的梯度后,会将梯度聚合并更新参数。最后将参数发送给训练节点,开始新一轮的训练。多卡训练一轮训练能训练```batch size * num gpus```的数据,比如单卡的```batch size```为32,单轮训练的数据量即32,而四卡训练的```batch size```为32,单轮训练的数据量为128。

注意 ```learning rate``````batch size``` 呈线性关系,这里单卡 ```batch size``` 为32,对应的 ```learning rate``` 为0.015,那么如果 ```batch size``` 减小4倍改为8,```learning rate``` 也需除以4;多卡时 ```batch size``` 为32,```learning rate``` 需乘上卡数。所以改变 ```batch size``` 或改变训练卡数都需要对应修改 ```learning rate```

**验证精度**

根据训练log可以看到模型验证的精度,若需再次验证精度,修改配置文件```./configs/MobileNetV1/qat_dis.yaml```中所需验证模型的文件夹路径及模型和参数名称```model_dir, model_filename, params_filename```,然后使用以下命令进行验证:

```shell
export CUDA_VISIBLE_DEVICES=0
python eval.py --config_path='./configs/eval.yaml'
```


## 4.预测部署

#### 4.1 PaddleLite端侧部署
PaddleLite端侧部署可参考:
- [Paddle Lite部署](https://github.com/PaddlePaddle/PaddleClas/blob/develop/docs/zh_CN/inference_deployment/paddle_lite_deploy.md)

## 5.FAQ
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
model_dir: ./MobileNetV3_large_x1_0_infer
model_filename: inference.pdmodel
params_filename: inference.pdiparams
batch_size: 128
data_dir: ./ILSVRC2012_data_demo/ILSVRC2012/
img_size: 224
resize_size: 256
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Global:
input_name: inputs
model_dir: MobileNetV3_large_x1_0_infer
model_filename: inference.pdmodel
params_filename: inference.pdiparams
batch_size: 128
data_dir: ./ILSVRC2012_data_demo/ILSVRC2012/

Distillation:
alpha: 1.0
loss: soft_label
Quantization:
use_pact: true
activation_bits: 8
activation_quantize_type: moving_average_abs_max
weight_quantize_type: channel_wise_abs_max
not_quant_pattern:
- skip_quant
quantize_op_types:
- conv2d
- depthwise_conv2d
- matmul
weight_bits: 8
TrainConfig:
epochs: 2
eval_iter: 5000
learning_rate: 0.001
optimizer_builder:
optimizer:
type: Momentum
weight_decay: 0.00002
origin_metric: 0.7896
Loading

0 comments on commit d3ba08b

Please sign in to comment.