# FaceAlignment-2D人脸对齐案例

本案例将基于Helen数据集讲述如何在mindspore中进行2d人脸对齐

## 1 准备环节

### 1.1 导入模块

导入模块需要用到部分src中的文件，校验时请保持该notebook与src文件夹平级。

In [1]:
import os
import cv2
import csv
import time
import math
import json
import numpy as np
import scipy.io as scio
from typing import List

import mindspore as ms
import mindspore.nn as nn
import mindspore.dataset as ds
from mindspore import load_checkpoint, load_param_into_net, Tensor
from mindspore.mindrecord import FileWriter
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback

### 1.2 环境配置

使用GRAPH模式进行实验并使用GPU环境。

In [None]:
ms.set_context(mode=ms.GRAPH_MODE, device_target='GPU', save_graphs=False)

### 1.3 数据集准备

#### 1.3.1 下载数据集

下载案例所用到的[人脸对齐数据集](http://www.ifp.illinois.edu/~vuongle2/helen/)，该数据集包含2,330个图像，其中，2,000个图像位于训练集，330个位于验证集，每一张图像都有194个关键点的标注。网页中给出了一些下载链接，包括训练（Train images）和测试用图像（Test images）还有标注（Annotation）,我们需要在网页中下载上述数据。

#### 1.3.2 下载Bounding Box标注

下载案例所用到的[BoundingBox标注](https://ibug.doc.ic.ac.uk/media/uploads/competitions/bounding_boxes.zip)。该链接指向的文件包含bounding_boxes_helen_trainset.mat和bounding_boxes_helen_testset.mat，其中含有对每个人脸的BoundingBox标注，可以用于对原始照片的裁剪。

#### 1.3.3 下载完成后需要将数据集目录构建成如下形式

```text
├── Helen/
    ├── annotation/
        ├── 1.txt
        ├── 2.txt
        ├── 3.txt
        ├── ...
    ├── train/
        ├── 232194_1.jpg
        ├── 1629243_1.jpg
        ├── 1681766_1.jpg
        ├── ...
    ├── bounding_boxes_helen_trainset.mat
    ├── trainname.txt
```


#### 1.3.4 我不知道如何构建数据集

你可以从这个链接获取我已经按照上述结构整理好的数据集: https://pan.baidu.com/s/1rFjm2BEL1F9N-y3MMs2o3Q     (访问密码 hele)
里面有Helen_192及其db文件，以及有Helen.zip。Helen.zip解压得到上述目录结构，而Helen_192是整理好的mindrecord文件(裁剪，无数据增强)，如果使用这个mindrecord文件可以跳过数据集制作过程。

## 2. 处理数据

### 2.1 在py文件中定义参数

数据集相关处理参数如下：
    img_size:将图片统一处理到的边长。
    dataset_side_data_enhance:数据集侧的数据增强，主要采用旋转方法对图片进行四向旋转，启用后会同时旋转标记而不是简单的仅处理图片。
    dataset_target_path:输出文件名，任意修改，但是当文件夹下有同名文件的时候，新的数据集将不会创建且不会覆盖。
    clip:是否裁剪。不裁剪的话不会使用boundingbox文件处理图片。

In [None]:
dataset_config = {
    'img_size': 192,
    'dataset_side_data_enhance': 'False',
    'dataset_target_path': 'Helen_192_no_enhance_do_clip',
    'clip': True
}# Helen Dataset


完整的配置文件如下所示：

In [None]:
config = {

    # Helen Dataset
    'img_size': 192,
    'dataset_side_data_enhance': 'False',
    'dataset_target_path': 'Helen_192_no_enhance_do_clip',
    'clip': True,

    # FaceAlignment Config
    'num_classes': 388,
    'batch_size': 4,
    'epoch_size': 1000,
    'warmup_epochs': 4,
    'lr': 0.0001,
    'momentum': 0.9,
    'weight_decay': 0.00004,
    'loss_scale': 1024,
    'save_checkpoint': True,
    'save_checkpoint_epochs': 10,
    'keep_checkpoint_max': 500,
    'save_checkpoint_path': "./checkpoint",
    'export_format': "MINDIR",
    'export_file': "FaceAlignment_2D"
}

### 2.2 制作mindrecord数据集

这一步的主要原因是数据集处理时间较长，所以处理成mindrecord方便随时读取

In [None]:
def to_mindrocord(img_size, output_path, clip, dataset_side_data_enhance=False):
    """
    Write Helen Dataset to Mindrecord File

    Args:
        clip: Clip Picture or Not
        img_size: Compress each img to [img_size, img_size, 3]
        output_path(string): Output MindRecord File Path
        dataset_side_data_enhance(bool): Rotate image with annotations or not. Default: False

    Returns:
        No Direct Return
        But Generate mindrecord File With 2 Columns ['label', 'image'] at 'output_path'

    Examples:
        >>> to_mindrocord(192, '/mnt/Helen_192', True, True)
    """
    finalpictures, annotations = read_helen(img_size, dataset_side_data_enhance, clip)

    writer = FileWriter(file_name=output_path, shard_num=1)
    cv_schema = {"image": {"type": "float32", "shape": [img_size, img_size, 3]},
                 "label": {"type": "float32", "shape": [1, 388]}}
    writer.add_schema(cv_schema, "Face Alignment Dataset")

    data = []
    limit = 8000 if dataset_side_data_enhance == 'True' else 2000
    for i in range(limit):
        sample = {}
        sample['label'] = annotations[i]
        sample['image'] = finalpictures[i]

        data.append(sample)
        if i % 10 == 0:
            writer.write_raw_data(data)
            data = []
    if data:
        writer.write_raw_data(data)
    writer.commit()

def read_helen(img_size, dataset_side_data_enhance=False, clip=False):
    """
    Read Helen Data In Files and Generate Dataset

    Args:
        clip: Clip Picture or Not. Default: False.
        img_size: Compress each img to [img_size, img_size, 3]
        dataset_side_data_enhance: Rotate or not. Default: False

    Returns:
        finalpictures: Array, Contain multiple Pictures in [-1, img_size, img_size, 3]
        annotations: Array, Contain multiple annotations in [-1, img_size, img_size, 3]

    """
    filename = []
    with open("src/process_datasets/Helen/trainname.txt") as file:
        for item in file:
            filename.append(item.replace("\n", ""))
    file.close()
    root_dir = "src/process_datasets/Helen/"
    bounding_box = scio.loadmat(root_dir + "bounding_boxes_helen_trainset.mat").get("bounding_boxes")[0]

    groundtruthboxes = []
    detectorboxes = []
    finalpictures = []
    annotations = []
    for i in range(0, 2000):
        assert str(filename[i] + ".jpg") == bounding_box[i][0][0][0][0]

        img_path = root_dir + "train/" + filename[i] + ".jpg"
        img = cv2.imread(img_path, flags=1)
        annotation_path = root_dir + "annotation/" + str(i + 1) + ".txt"
        annotation = read_csv(annotation_path)
        ground_truth_box = bounding_box[i][0][0][2][0].astype(np.int32)
        groundtruthboxes.append(ground_truth_box)
        detecter_box = bounding_box[i][0][0][1][0].astype(np.int32)
        detectorboxes.append(detecter_box)
        if clip:
            final_pic = picture_clip(img, ground_truth_box)
            final_pic, new_annotation = picture_resize(final_pic, annotation, ground_truth_box[0],
                                                       ground_truth_box[1], img_size)
        else:
            final_pic = img
            final_pic, new_annotation = picture_resize(final_pic, annotation, 0, 0,
                                                       img_size)
        final_pic = final_pic.astype(np.float32)
        new_annotation = new_annotation.astype(np.float32)
        if dataset_side_data_enhance == 'True':
            pic_1 = cv2.rotate(final_pic, cv2.ROTATE_90_CLOCKWISE)
            anno_1 = new_annotation.copy()
            anno_1[:, 0] = img_size - new_annotation[:, 1]
            anno_1[:, 1] = new_annotation[:, 0].copy()
            pic_2 = cv2.rotate(final_pic, cv2.ROTATE_180)
            anno_2 = new_annotation.copy()
            anno_2[:, 0] = img_size - new_annotation[:, 0]
            anno_2[:, 1] = img_size - new_annotation[:, 1]
            pic_3 = cv2.rotate(final_pic, cv2.ROTATE_90_COUNTERCLOCKWISE)
            anno_3 = new_annotation.copy()
            anno_3[:, 0] = new_annotation[:, 1].copy()
            anno_3[:, 1] = img_size - new_annotation[:, 0]
            finalpictures.append(pic_1)
            annotations.append(anno_1.astype(np.float32))
            finalpictures.append(pic_2)
            annotations.append(anno_2.astype(np.float32))
            finalpictures.append(pic_3)
            annotations.append(anno_3.astype(np.float32))
        finalpictures.append(final_pic)
        annotations.append(new_annotation.astype(np.float32))
    return finalpictures, annotations

def read_csv(path):
    """
    Read csv File
    Args :
        path(str): Helen Annotation TXT File Path

    Returns :
        result(numpy.ndarray): Annotation Data in np.ndarray. For Helen Dataset, output shape is (194, 2).
    """
    data = []
    with open(path) as f:
        reader = csv.reader(f, delimiter=',')
        for row in reader:
            data.append(row)
    result = np.array(data[1:], dtype=float)
    return result

def picture_clip(pic, box):
    """
    Clip Image Using Bounding Box

    Input :
        pic(ndarray) : Picture at any size
        box(ndarray) : Box in [xMin,yMin,xMax,yMax]

    Output : Clipped Picture
    Example :
        >>> picture_clip(pic, [1, 5, 65, 97])
    """
    xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
    img_crop = pic[int(ymin):int(ymax), int(xmin):int(xmax)].copy()
    return img_crop


def picture_resize(picture, annotation, x0, y0, target_size):
    """
    Resize Picture And Adjusy Annotation According to Start Point and Target Size
    Pictures should be resized, annotations need sub and 'resize'

    Input :
        Picture : CV2 Picture [ W, H, C ]
        annotation : Marked Points , Absolute Position , [(x1,y1),(x2,y2)...]
        x0 : Bounding Box's Left Upper Corner's Position on X axis
        y0 : Bounding Box's Left Upper Corner's Position on Y axis
        target_size : Will Resize Image To (target_size, target_size)

    Output :
        Picture : Resized Picture
        annotation ： annotations , But Relative Position , Relate to Resized Picture

    Examples:
        >>>picture_resize(img, annotation, 10, 20, 192)
    """
    y_ratio, x_ratio = target_size / picture.shape[0], target_size / picture.shape[1]
    img_resized = cv2.resize(picture, (target_size, target_size))
    img_resized = img_resized / 255
    annotation[:, 0] = annotation[:, 0] - x0
    annotation[:, 1] = annotation[:, 1] - y0
    annotation[:, 0] = annotation[:, 0] * x_ratio
    annotation[:, 1] = annotation[:, 1] * y_ratio

    return img_resized, annotation

In [6]:
# to_mindrocord(config['img_size'], config['dataset_target_path'], config['clip'], dataset_side_data_enhance=config['dataset_side_data_enhance'])

处理完毕后可以在同级目录下找到mindrecord及其db文件，加载数据集就可以简单的用MindDataset语句进行加载，要读取的列有两列，分别是image和label，num_parallel_workers和shuffle按照需求选择。读取后进行数据转换以及其他操作

In [6]:
def data_load(mindrecord_path, do_train, batch_size=1, repeat_num=1, count_number=False, num_worker=4, shuffle=None):
    dataset = ds.MindDataset(mindrecord_path, columns_list=["image", "label"], num_parallel_workers=num_worker, shuffle=shuffle)
    count = 0
    if count_number:
        print("Calculating Size")
        count = 0
        for _ in dataset.create_dict_iterator(output_numpy=True):
            # print("sample: {}".format(item))
            count += 1
        print("Got {} samples in Total, Load Successful".format(count))

    buffer_size = 1000
    normalize_op = ds.vision.c_transforms.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
                                                    std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    change_swap_op = ds.vision.c_transforms.HWC2CHW()
    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.float32)
    if do_train:
        trans = [normalize_op, change_swap_op, type_cast_op]
        dataset = dataset.map(operations=trans, input_columns="image", num_parallel_workers=num_worker)
        dataset = dataset.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_worker)
    else:
        trans = [normalize_op, change_swap_op, type_cast_op]
        dataset = dataset.map(operations=trans, input_columns="image", num_parallel_workers=num_worker)

    # apply shuffle operations
    dataset = dataset.shuffle(buffer_size=buffer_size)

    # apply batch operations
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.repeat(repeat_num)

    return dataset, count

In [8]:
dataset, count = data_load('Helen_192_no_enhance_do_clip', True, batch_size=1, repeat_num=1, count_number=False, num_worker=4, shuffle=None)
print('dataset size is : \n', dataset.get_dataset_size())

dataset size is : 
 2000


## 3 训练准备

### 3.1 网络定义

完成数据集创建与读取以后就开始着手网络定义。FaceAlignment所用的网络由样例onnx文件包含的网络描述直接得出。这里直接引入模型定义并实例化

In [None]:
network_config = [
    # in_channels, out_channels, kernel_size, stride, padding, dilation, group
    [3, 16, 3, 2, 1, 1, 1],
    [16, 16, 3, 1, 1, 1, 16],
    [16, 32, 1, 1, 0, 1, 1],
    [32, 32, 3, 2, 1, 1, 32],
    [32, 64, 1, 1, 0, 1, 1],
    [64, 64, 3, 1, 1, 1, 64],
    [64, 64, 1, 1, 0, 1, 1],
    [64, 64, 3, 2, 1, 1, 64],
    [64, 128, 1, 1, 0, 1, 1],
    [128, 128, 3, 1, 1, 1, 128],
    [128, 128, 1, 1, 0, 1, 1],
    [128, 128, 3, 2, 1, 1, 128],
    [128, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 1, 1, 1, 256],
    [256, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 1, 1, 1, 256],
    [256, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 1, 1, 1, 256],
    [256, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 1, 1, 1, 256],
    [256, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 1, 1, 1, 256],
    [256, 256, 1, 1, 0, 1, 1],
    [256, 256, 3, 2, 1, 1, 256],
    [256, 512, 1, 1, 0, 1, 1],
    [512, 512, 3, 1, 1, 1, 512],
    [512, 512, 1, 1, 0, 1, 1],
    [512, 64, 3, 2, 1, 1, 1]
]

class Facealignment2d(nn.Cell):
    """
    Model define for 2D face alignment work
    Model structure and layer names are directly translated from the given ONNX file

    Args:
        output_channel (int) - Should be number of alignment points * 2, this input is 388 for Helen dataset.

    Inputs:
        X(Tensor(1, 3, 192, 192)): Input image in tensor

    Outputs:
        x(Tensor(1, 1, output_channel)): Predict output. Each point takes 2 channels.

    Supported Platforms:
        ``Ascend`` ``GPU``

    """

    def __init__(self, output_channel):
        super(Facealignment2d, self).__init__()
        self.network_config = network_config
        self.features = self._make_layer(network_config, output_channel)

    def construct(self, x):
        """
        Define forward pass
        """
        x = self.features(x)
        return x

    def _make_layer(self, cfg: List[List[int]], output_channel: int) -> nn.SequentialCell:
        '''
        Make layer for model 'FaceAlignment2d'.

        Args:
            cfg: Model layer config, like 'network_config' above
            output_channel(int) : Should be number of alignment points * 2, this input is 388 for Helen dataset.

        Returns:
            SequentialCell, Contains layers generated With 'cfg'

        Examples:
            >>>_make_layer(network_config, 388)
        '''
        layers = []
        for v in cfg:
            layers += [nn.Conv2d(in_channels=v[0], out_channels=v[1],
                                 kernel_size=v[2], stride=v[3],
                                 pad_mode="pad",
                                 padding=(v[4], v[4], v[4], v[4]),
                                 dilation=v[5], group=v[6]),
                       nn.BatchNorm2d(num_features=v[1]),
                       nn.PReLU()]
        out_channels = cfg[-1][1] * cfg[-1][2] * cfg[-1][2]
        layers += [nn.Flatten(), nn.Flatten(), nn.Dense(in_channels=out_channels, out_channels=output_channel)]
        return nn.SequentialCell(layers)

In [1]:
net = Facealignment2d(output_channel=config['num_classes'])

NameError: name 'Facealignment2d' is not defined

### 3.2 损失函数

由于任务是完成194个点（388通道，每个点的横纵坐标各对应一个通道）的回归任务，损失函数用估计点与真实点的平均曼哈顿距离即可，如下。

In [10]:
class MSELoss(nn.LossBase):
    """
    MSELoss.

    Returns:
        None.

    Examples:
        >>> MSELoss()
    """

    def __init__(self):
        super(MSELoss, self).__init__()
        self.mse = nn.MSELoss()

    def construct(self, logit, label):
        ''' Repackage MSE LOSS'''
        x = self.mse(logit, label)
        return x

loss = MSELoss()
loss_scale = ms.FixedLossScaleManager(
        config['loss_scale'], drop_overflow_update=False)

### 3.3学习率和优化器

学习率变化和优化器定义如下

In [11]:
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
    """
    Summary.

    Generate learning rate array

    Args:
        global_step(int): total steps of the training
        lr_init(float): init learning rate
        lr_end(float): end learning rate
        lr_max(float): max learning rate
        warmup_epochs(int): number of warmup epochs
        total_epochs(int): total epoch of training
        steps_per_epoch(int): steps of one epoch, value is dataset.get_dataset_size()

    Returns:
        np.array, learning rate array

    Examples:
        >>> get_lr(0, 0, 0, 0.0001, 4, 1000, 8000)

    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    for i in range(total_steps):
        if i < warmup_steps:
            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
        else:
            lr = lr_end + \
                 (lr_max - lr_end) * \
                 (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
        if lr < 0.0:
            lr = 0.0
        lr_each_step.append(lr)

    current_step = global_step
    lr_each_step = np.array(lr_each_step).astype(np.float32)
    learning_rate = lr_each_step[current_step:]

    return learning_rate

epoch_size = config['epoch_size']
step_size = dataset.get_dataset_size()
lr = ms.Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config['lr'], warmup_epochs=config['warmup_epochs'], total_epochs=epoch_size, steps_per_epoch=step_size))

优化器选择使用动量优化器

In [12]:
opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config['momentum'], config['weight_decay'], config['loss_scale'])

### 3.4 Monitor定义

In [13]:
class Monitor(Callback):
    """
    Monitor loss and time.

    Args:
        lr_init (numpy array): train lr

    Returns:
        None

    Examples:
        >>> Monitor(100,lr_init=ms.Tensor([0.05]*100).asnumpy())
    """

    def __init__(self, lr_init=None):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)

    def epoch_begin(self, run_context):
        """ Reset loss array and timer"""
        self.losses = []
        self.epoch_time = time.time()

    def epoch_end(self, run_context):
        """ Calculate epoch time and epoch average loss"""
        cb_params = run_context.original_args()
        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, per_step_mseconds, np.mean(self.losses)))

    def step_begin(self, run_context):
        """ Record step time"""
        self.step_time = time.time()

    def step_end(self, run_context):
        """ Calculate step time and step average loss"""
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs

        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], ms.Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, ms.Tensor):
            step_loss = np.mean(step_loss.asnumpy())

        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
            cb_params.cur_epoch_num -
            1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))


### 3.5模型包装

这一步包装好训练用的模型，定义好callback，确定ckpt保存位置

In [14]:
model = ms.Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)
cb = [Monitor(lr_init=lr.asnumpy())]
ckpt_save_dir = config['save_checkpoint_path'] + "ckpt_" + "/"
if config['save_checkpoint']:
    config_ck = CheckpointConfig(save_checkpoint_steps=config['save_checkpoint_epochs'] * step_size, keep_checkpoint_max=config['keep_checkpoint_max'])
    ckpt_cb = ModelCheckpoint(prefix="FaceAlignment_2D", directory=ckpt_save_dir, config=config_ck)
    cb += [ckpt_cb]

## 4 开始训练

使用包装好的model进行训练，会以参数'save_checkpoint_epochs'为间隔保存ckpt文件于./checkpointckpt_目录下，第一次训练的命名规则为 'FaceAlignment_2D-{epoch}_{step}.ckpt'

In [None]:
model.train(epoch_size, dataset, callbacks=cb, dataset_sink_mode=False)

## 5 评估

这一部分主要对训练出来的模型效果进行评估，主要的参考标准有：
对于每一张图片，输出：
    ION：非精度指标：双眼外侧眼角横向坐标差值，单位为像素
    MNE：精度指标：所有预测关键点与真实关键点的距离误差，单位为ION。
    ERR：精度指标：所有输出通道（388通道）误差之和，可以理解为所有输出点位与真实点位的曼哈顿距离总和，单位为像素。
在所有照片评估完后，输出：
    AUC 0.1 precision：MNE低于0.1的比例
    AUC 0.2 precision：MNE低于0.2的比例
    Mean Normalized Error：所有图片的预测关键点与真实关键点的距离误差经过ION归一化后的的平均值。

### 5.1 数据读取与增强

In [4]:
def dataload(mindrecord_path):
    """
    Load mindrecord from File

    Args:
        mindrecord_path(string): mindrecord path

    Returns:
        Dataset Read From Path

    Examples:
        >>> dataload('/mnt/Generated.mindrecord')
    """
    dataset = ds.MindDataset(mindrecord_path, columns_list=["image", "label"])
    count = 0
    for _ in dataset.create_dict_iterator(output_numpy=True):
        count += 1
    print("Got {} samples in Total, Load Successful".format(count))
    return dataset

In [None]:
def eval_data_preprocess(dataset):
    """
    Data Preprocess Function For Evaluate

    Args:
        dataset(mindrecord dataset): Loaded Dataset

    Returns:
        data_set(mindrecord dataset): Preprocessed Dataset
    """
    normalize_op = ds.vision.c_transforms.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
                                                    std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    change_swap_op = ds.vision.c_transforms.HWC2CHW()
    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.float32)
    trans = [normalize_op, change_swap_op, type_cast_op]
    data_set = dataset.map(operations=trans, input_columns="image", num_parallel_workers=1)
    data_set = data_set.batch(batch_size=1, drop_remainder=True)
    return data_set

## 5.2正式评估

正式评估需要用到权重文件，请在pre_trained_path中填入ckpt文件路径。
定义网络、读入数据集、加载权重文件、打包模型，对数据集中的每一张图片进行预处理以及预测并记录上述标准下的误差。
最后整合所有误差信息并输出到屏幕。

In [None]:
pre_trained_path = './checkpointckpt_/FaceAlignment_2D-1000_2000.ckpt'
channel = 388
net = Facealignment2d(output_channel=channel)
dataset_raw = dataload('Helen_192_no_enhance_do_clip')
param_dict = load_checkpoint(pre_trained_path)
load_param_into_net(net, param_dict)
model = ms.Model(net)
i = 0
mnes = []
errs = []
for item in dataset_raw.create_dict_iterator(output_numpy=True):
    img = []
    img.append(item['image'].copy())
    dataset_one = ds.GeneratorDataset(source=img, column_names=["image"])
    dataset_ready = eval_data_preprocess(dataset_one)
    output_one = []
    for item_one in dataset_ready.create_dict_iterator(output_numpy=True):
        output_one = model.predict(Tensor(item_one['image']))
    target_output = item['label'].copy().reshape((channel, 1))
    output_np = output_one.asnumpy().reshape((channel, 1))
    ion = np.abs(target_output[250] - target_output[290])
    err = np.abs(target_output - output_np)
    errs.append(np.true_divide(err, ion))
    tmp = np.sum(err)
    mne = np.true_divide(tmp, ion * channel)
    mnes.append(mne)
    print("Cur Img Index : " + str(i))
    print("ION : " + str(ion))
    print("MNE : " + str(mne))
    print("ERR : " + str(tmp))
    img[0] = img[0] * 256
    for j in range(int(channel/2)):
        cv2.circle(img[0], (int(output_np[j * 2]), int(output_np[j * 2 + 1])), 2, (0, 0, 255), 1)
    cv2.imwrite('./predict/' + str(i) + '.jpg', img[0])
    i += 1
total_count = i * channel
positive_1 = 0
positive_2 = 0
print(len(errs))
for k in range(i):
    for l in range(channel):
        if errs[k][l] < 0.1:
            positive_1 += 1
        if errs[k][l] < 0.2:
            positive_2 += 1
meannormerror = np.array(mnes).sum() / i
print("AUC 0.1 precision : " + str(positive_1 / total_count))
print("AUC 0.2 precision : " + str(positive_2 / total_count))
print("Mean Normalized Error : " + str(meannormerror))

## 6 推理

推理基于Retinaface的输出结果进行，也可以独立进行。
若进行独立推理，则要求每张输入照片尽可能只包含一个人脸，指定参数的时候指定一个文件夹就好。
若基于retinaface的数据结果进行推理，则需要指定参数：retinaface预测产生的json文件路径以及原始图片所在文件夹

### 6.1 推理准备

定义文件夹读取函数、数据预处理函数如下：
    文件夹读取函数用于从文件夹中读取所有图片，指定的路径参数不要以'/'结尾。使用的时候请确保该文件夹下无非图片文件！
    数据预处理函数则用于对输入图片做基本的归一化。

In [8]:
def read_dir(dir_path):
    if dir_path[-1] == '/':
        raise "Do not tail with /"
    all_files = []
    if os.path.isdir(dir_path):
        file_list = os.listdir(dir_path)
        for f in file_list:
            f = dir_path + '/' + f
            if os.path.isdir(f):
                sub_files = read_dir(f)
                # Load File Inside Child Folder
                all_files = sub_files + all_files
            else:
                if os.path.splitext(f)[1] in ['.jpg', '.png', '.bmp', '.jpeg']:
                    all_files.append(f)
    else:
        raise "Error,not a dir"
    return all_files

def data_preprocess(data_set, batch_size=1):
    normalize_op = ds.vision.c_transforms.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
                                                    std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    change_swap_op = ds.vision.c_transforms.HWC2CHW()
    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.float32)
    trans = [normalize_op, change_swap_op, type_cast_op]
    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=1)
    data_set = data_set.batch(batch_size, drop_remainder=True)
    return data_set

### 6.2 独立推理

独立推理只是将图片打包使用模型预测，不涉及准备阶段提到的json和裁剪问题。
这里指定好目标文件夹以及预训练模型的路径，执行后会将照片和标记输出到源文件夹下。

In [None]:
image_dir = './images/facealignment/infer'
pre_trained = './checkpointckpt_/FaceAlignment_2D-2150_2000.ckpt'

In [None]:
def infer(image_dir, pre_trained):
    imgs = read_dir(image_dir)
    net = Facealignment2d(output_channel=388)
    param_dict = load_checkpoint(pre_trained)
    load_param_into_net(net, param_dict)
    model = ms.Model(net)
    for file in imgs:
        image = cv2.imread(file)
        image = np.array(image)
        image = cv2.resize(image, (192, 192))
        raw_image = image.copy()
        image = image/255
        imgs = []
        imgs.append(image.copy())
        dataset_one = ms.dataset.GeneratorDataset(source=imgs, column_names=["image"])
        dataset = data_preprocess(dataset_one, batch_size=1)


        for item_one in dataset.create_dict_iterator(output_numpy=True):
            output_one = model.predict(ms.Tensor(item_one['image']))
            result = np.array(output_one).astype(int).reshape((194, 2))
            np.savetxt(file+"_predict", result, delimiter=",")

            for i in range(194):
                raw_image = cv2.circle(raw_image, (int(result[i, 0]), int(result[i, 1])), 2, (0, 0, 255), 1)
            cv2.imwrite(file+"_predict.jpg", raw_image)

infer(image_dir, pre_trained)

### 6.3 联合推理

这一步将会对图片（包含多张人脸的图片）使用retinaface识别输出的json文件进行裁剪。会先输出裁剪结果，再进行常规的推理。
需要定义的有：json文件解析函数、图片裁剪函数。
    1.json文件解析函数用于解析retinaface输出的json文件，包含对图片中各个人脸位置的表达。在解析后会输出裁剪后的人脸到文件夹
    2.图片裁剪函数用于从原图中裁剪出各个人脸。该函数会处理超过边界的人脸框。
在这一步处理的时候请确保目录结构如下

```text
├── （输入的路径）/
    ├── infer（原图路径）/
        ├── 1.jpg
        ├── 2.jpg
        ├── 3.jpg
        ├── ...
    ├── infer.json
    ├── single（如果没有，请新建这个文件夹）/
```


In [2]:
def resolve_json(json_path):
    json_file = open(json_path + '/infer.json', 'r', encoding='utf-8')
    description = json.load(json_file)
    counter = 0
    for x in range(len(description)):

        # For each Picture
        temp_key = list(description.keys())[x]
        img = description[temp_key]
        img_path = img['img_path']
        read_img = cv2.imread(json_path+"/"+img_path)
        bboxes = img['bboxes']

        for i in range(len(bboxes)):
            if bboxes[i][4] > 0.95:
                # For Each Face
                img_clipped = pic_clip(read_img, bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3])
                img_resized = cv2.resize(img_clipped, (192, 192))
                cv2.imwrite(json_path+'/single/' + str(counter) + ".jpg", img_resized)
                counter += 1

def pic_clip(img, x, y, width, height):
    if x < 0:
        t0 = 0
    else:
        t0 = x
    if y < 0:
        t1 = 0
    else:
        t1 = y
    if x + width < img.shape[1]:
        t2 = x + width
    else:
        t2 = img.shape[1]
    if y + height < img.shape[0]:
        t3 = y + height
    else:
        t3 = img.shape[0]
    img_clipped = img[int(t1):int(t3), int(t0):int(t2)]
    return img_clipped

In [5]:
path = '/mnt/c/Users/27976/Documents/WeChat Files/wxid_kzy8nz8xw3ug22/FileStorage/MsgAttach/4a643c27dd06f319f9721630b8d045e7/File/2022-07/infer'
resolve_json(path)

之后的内容相当于正常的独立推理

In [6]:
infer(path+'/single', pre_trained)

## 7 推理效果

![0.jpeg_predict.jpg](./images/facealignment/infer/0.jpeg_predict.jpg)
![1.jpeg_predict.jpg](./images/facealignment/infer/1.jpeg_predict.jpg)
![2.jpeg_predict.jpg](./images/facealignment/infer/2.jpeg_predict.jpg)
![3.jpeg_predict.jpg](./images/facealignment/infer/3.jpeg_predict.jpg)
![4.jpeg_predict.jpg](./images/facealignment/infer/4.jpeg_predict.jpg)
![5.jpeg_predict.jpg](./images/facealignment/infer/5.jpeg_predict.jpg)