# 锚框

先解释两个概念：宽高比率（aspect ratio）和缩放比（scale ratio）。

假设原图的尺寸为宽 $w$，高 $h$，则有：

$$
\begin{cases}
\frac{w_1 h_1}{wh} = s^2\\
\frac{w_1}{h_1} = \frac{w}{h} r
\end{cases}
$$

这里 $r$ 被称为宽高比率，$s$ 被称为缩放因子。

可推导出，缩放后的图片尺寸为：

$$
\begin{cases}
w_1 = ws \sqrt{r}\\
h_1 = \frac{hs}{\sqrt{r}}
\end{cases}
$$

代码实现：

In [None]:
#@save
def multibox_prior(origin_size, sizes, ratios, **kwargs):
    """生成以每个像素为中心具有不同形状的锚框"""
    in_height, in_width = origin_size
    num_sizes, num_ratios = len(sizes), len(ratios)
    boxes_per_pixel = (num_sizes + num_ratios - 1)
    size_tensor = np.array(sizes, **kwargs)
    ratio_tensor = np.array(ratios, **kwargs)

    # 为了将锚点移动到像素的中心，需要设置偏移量。
    # 因为一个像素的的高为1且宽为1，我们选择偏移我们的中心0.5
    offset_h, offset_w = 0.5, 0.5
    steps_h = 1.0 / in_height  # 在y轴上缩放步长
    steps_w = 1.0 / in_width  # 在x轴上缩放步长

    # 生成锚框的所有中心点
    center_h = (np.arange(in_height, **kwargs) + offset_h) * steps_h
    center_w = (np.arange(in_width, **kwargs) + offset_w) * steps_w
    shift_x, shift_y = np.meshgrid(center_w, center_h)
    shift_x, shift_y = shift_x.reshape(-1), shift_y.reshape(-1)

    # 生成“boxes_per_pixel”个高和宽，
    # 之后用于创建锚框的四角坐标(xmin,xmax,ymin,ymax)
    w = np.concatenate((size_tensor * np.sqrt(ratio_tensor[0]),
                        sizes[0] * np.sqrt(ratio_tensor[1:]))) \
                        * in_height / in_width  # 处理矩形输入
    h = np.concatenate((size_tensor / np.sqrt(ratio_tensor[0]),
                        sizes[0] / np.sqrt(ratio_tensor[1:])))
    # 除以2来获得半高和半宽
    anchor_manipulations = np.tile(np.stack((-w, -h, w, h)).T,
                                   (in_height * in_width, 1)) / 2

    # 每个中心点都将有“boxes_per_pixel”个锚框，
    # 所以生成含所有锚框中心的网格，重复了“boxes_per_pixel”次
    out_grid = np.stack([shift_x, shift_y, shift_x, shift_y],
                         axis=1).repeat(boxes_per_pixel, axis=0)
    output = out_grid + anchor_manipulations
    return np.expand_dims(output, axis=0)

In [None]:
import numpy as np


class AnchorBase:
    def __init__(self, base_size, scales, ratios):
        self.scales = np.array(scales)  #
        self.ratios = np.array(ratios)  #
        self.num_anchors = len(self.ratios) * len(self.scales)  # 锚框的个数
        self.base_size = base_size  # 滑动窗口的大小
        if isinstance(base_size, int):
            self._w, self._h = [base_size]*2
        elif len(base_size) == 2:
            self._w, self._h = base_size
        elif len(base_size) == 1:
            self._w, self._h = base_size*2

        self._anchor = np.array([1, 1, self._w, self._h]) - 1

    @property
    def anchor(self):
        return self._anchor

    @anchor.setter
    def anchor(self, new_anchor):
        self._anchor = new_anchor

    @property
    def w(self):
        '''
        锚框的宽度
        '''
        return self.anchor[2] - self.anchor[0] + 1

    @property
    def h(self):
        '''
        锚框的高度
        '''
        return self.anchor[3] - self.anchor[1] + 1

    @property
    def size(self):
        '''
        锚框的面积
        '''
        return self.w * self.h

    @property
    def _whctrs(self):
        """
        Return x center, and y center for an anchor (window). 锚框的中心坐标
        """
        x_ctr = self.anchor[0] + 0.5 * (self.w - 1)
        y_ctr = self.anchor[1] + 0.5 * (self.h - 1)
        return np.array([x_ctr, y_ctr])

    @staticmethod
    def _coordinate(aspect, ctr):
        '''
        依据宽高组合计算锚框的坐标
        '''
        k = (aspect - 1) / 2
        return np.concatenate([ctr - k, ctr + k], axis=1)


class Anchor(AnchorBase):
    def __init__(self, base_size, scales, ratios):
        super().__init__(base_size, scales, ratios)
        self.anchors = self.gen_anchors()

    @property
    def ratio_aspects(self):
        '''
        依据 ratios 获取锚框的所有宽高组合
        '''
        size_ratios = self.size / self.ratios
        ws = np.round(np.sqrt(size_ratios))
        hs = np.round(ws * self.ratios)
        return np.stack([ws, hs], axis=1)

    @property
    def ratio_anchors(self):
        return self._coordinate(self.ratio_aspects, self._whctrs)

    @property
    def scale_aspects(self):
        '''
        依据 scales 获取锚框的所有宽高组合
        '''
        ws = self.w * self.scales
        hs = self.h * self.scales
        return np.stack([ws, hs], axis=1)

    @property
    def scale_anchors(self):
        return self._coordinate(self.scale_aspects, self._whctrs)

    def gen_anchors(self):
        '''
        获取最终的 base_anchors
        '''
        anchors = []
        for anchor in self.ratio_anchors:
            self.anchor = anchor
            anchors.append(self.scale_anchors)
        return np.concatenate(anchors)

In [None]:
scales = [8, 16, 32]  # 尺度，面积比
ratios = [0.5, 1, 2]  # window（滑动窗口） 与锚框的面积的比率（aspect ratios）
base_size = 16  # 滑动窗口的大小

self = Anchor(base_size, scales, ratios)

self.anchors

In [None]:
class Anchor(AnchorBase):
    def __init__(self, base_size, scales, ratios):
        super().__init__(base_size, scales, ratios)

    @property
    def W(self):
        '''
        计算 w_1/ w
        '''
        W = self.scales[:, None] / np.sqrt(self.ratios)
        return np.round(W)

    @property
    def H(self):
        '''
        计算 h_1/ h
        '''
        H = self.W * self.ratios
        return np.round(H)

    @property
    def aspect(self):
        '''
        所有的宽高组合
        '''
        return np.stack([self.W.flatten(), self.H.flatten()], axis=1)

    @property
    def base_anchors(self):
        return self._coordinate(self.aspect, self._whctrs)

    @property
    def anchors(self):
        '''
        获取最终的 base_anchors
        '''
        return self.base_anchors * np.array([self.w, self.h]*2)