# Generate anchor boxes

> Methods to generate anchor boxes of different aspect ratios. 

In [None]:
#| default_exp anchor

In [None]:
#| export
import inspect

import math
import numpy as np
from fastcore.foundation import L, mask2idxs
from fastcore.utils import gt
from numpy.typing import ArrayLike 
from typing import Union
import json
from collections import defaultdict 
import warnings

from pybx.ops import named_idx
from pybx.basics import get_bx, stack_bxs_inplace, BX_TYPE
from pybx.utils import get_edges, validate_boxes, as_tuple, reassign_label
from pybx.excepts import NoGroundTruthBxs

To generate anchor boxes, we need three basic information:

- Input image size, `image_sz`: To position our anchor boxes within the maximum 
coordinates (`width`, `height`) of the image.
- Feature map size, `feature_sz`: Feature map is the size (`width`, `height`) 
of the output of a convolutional operation. A $10\times10$ feature map would mean 
$10\times10$ local receptive field locations can be traced back into the 
input image. These 100 receptive field locations ($10\times10=100$) in the input image 
would act as our initial anchor box candidates.

![](https://ars.els-cdn.com/content/image/1-s2.0-S0925231217314169-gr4.jpg)

- Aspect ratio of anchor boxes, `asp_ratio`: To generate anchor boxes with 
different `width` to `height` ratio (default `asp_ratio=1`).

In [None]:
# | export
def bx(
    image_sz: (int, tuple),
    feature_sz: (int, tuple),
    asp_ratio: float = None,
    clip: bool = True,
    named: bool = True,
    anchor_sfx: str = "a",
    min_visibility: float = 0.25,
) -> ArrayLike:
    """Calculate anchor box coords given an image size and feature size
    for a single aspect ratio.

    Parameters
    ----------
    image_sz : (int,tuple)
        image size (width, height)
    feature_sz : (int,tuple)
        feature map size (width, height)
    asp_ratio : float, optional
        aspect ratio (width:height), by default None
    clip : bool, optional
        whether to apply np.clip, by default True
    named : bool, optional
        whether to return (coords, labels), by default True
    anchor_sfx : str, optional
        suffix anchor label with anchor_sfx, by default "a"
    min_visibility : float, optional
        minimum visibility dictates the condition for a box to be considered
        valid. The value corresponds to the ratio of expected area of an anchor box
        to the calculated area after clipping to image dimensions., by default 0.25

    Returns
    -------
    ArrayLike
        anchor box coordinates in `pascal_voc` format
        if named=True, a list of anchor box labels are also returned.
    """
    labels = None
    image_sz = as_tuple(image_sz)
    feature_sz = as_tuple(feature_sz)
    asp_ratio = 1.0 if asp_ratio is None else asp_ratio
    # n_boxes = __mul__(*feature_sz)
    top_edges = get_edges(image_sz, feature_sz, op="noop")
    bot_edge = get_edges(image_sz, feature_sz, op="add")
    coords = np.hstack([top_edges, bot_edge])  # raw coords
    coords_wh = coords[:, 2:] - coords[:, :2]  # w -> xmax-xmin, h -> ymax-ymin
    coords_center = coords[:, 2:] - coords_wh / 2  # xmax-w/2, ymax-h/2
    # scale the dimension of width and height with asp ratios
    _w = coords_wh[:, 0] * math.sqrt(asp_ratio)
    _h = coords_wh[:, 1] / math.sqrt(asp_ratio)
    coords_asp_wh = np.stack([_w, _h], -1)
    xy_min = coords_center - coords_asp_wh / 2
    xy_max = coords_center + coords_asp_wh / 2
    coords = np.hstack([xy_min, xy_max])
    # check for valid boxes
    b = validate_boxes(
        coords, image_sz, feature_sz, clip=clip, min_visibility=min_visibility
    )
    if named:
        anchor_sfx = f"{anchor_sfx}_{feature_sz[0]}x{feature_sz[1]}_{asp_ratio:.1f}_"
        labels = named_idx(len(b), anchor_sfx)
    # init multibx
    b = get_bx(b, labels)
    return (b.coords, b.label) if named else b.coords


In [None]:
coords_1, labels_1 = bx(100, 10, 0.5)

In [None]:
coords_1

[[1, 0, 8, 12],
 [11, 0, 18, 12],
 [21, 0, 28, 12],
 [31, 0, 38, 12],
 [41, 0, 48, 12],
 [51, 0, 58, 12],
 [61, 0, 68, 12],
 [71, 0, 78, 12],
 [81, 0, 88, 12],
 [91, 0, 98, 12],
 [1, 7, 8, 22],
 [11, 7, 18, 22],
 [21, 7, 28, 22],
 [31, 7, 38, 22],
 [41, 7, 48, 22],
 [51, 7, 58, 22],
 [61, 7, 68, 22],
 [71, 7, 78, 22],
 [81, 7, 88, 22],
 [91, 7, 98, 22],
 [1, 17, 8, 32],
 [11, 17, 18, 32],
 [21, 17, 28, 32],
 [31, 17, 38, 32],
 [41, 17, 48, 32],
 [51, 17, 58, 32],
 [61, 17, 68, 32],
 [71, 17, 78, 32],
 [81, 17, 88, 32],
 [91, 17, 98, 32],
 [1, 27, 8, 42],
 [11, 27, 18, 42],
 [21, 27, 28, 42],
 [31, 27, 38, 42],
 [41, 27, 48, 42],
 [51, 27, 58, 42],
 [61, 27, 68, 42],
 [71, 27, 78, 42],
 [81, 27, 88, 42],
 [91, 27, 98, 42],
 [1, 37, 8, 52],
 [11, 37, 18, 52],
 [21, 37, 28, 52],
 [31, 37, 38, 52],
 [41, 37, 48, 52],
 [51, 37, 58, 52],
 [61, 37, 68, 52],
 [71, 37, 78, 52],
 [81, 37, 88, 52],
 [91, 37, 98, 52],
 [1, 47, 8, 62],
 [11, 47, 18, 62],
 [21, 47, 28, 62],
 [31, 47, 38, 62],
 [41, 

Usually multiple anchor boxes with different `feature_sz` and `asp_ratio` are 
needed. This requirement arises in the case of multiscale object detection.

For multiscale object detection, feature maps from different convolution 
operations of the network are used to trace back into the input image, to 
generate anchor boxes. 
The `bxs` method of `pybx` provides this possibility.

In [None]:
# | export


def bxs(
    image_sz: (int, tuple),
    feature_szs: list = None,
    asp_ratios: list = None,
    named: bool = True,
    **kwargs,
) -> ArrayLike:
    """Calculate anchor box coords given an image size and multiple
    feature sizes for mutiple aspect ratios.

    Parameters
    ----------
    image_sz : (int,tuple)
        image size (width, height)
    feature_szs : list, optional
        list of feature map sizes, each feature map size being an int or tuple, by default [(8, 8), (2, 2)]
    asp_ratios : list, optional
        list of aspect ratios for anchor boxes, each aspect ratio being a float calculated by (width:height), by default [1 / 2.0, 1.0, 2.0]
    named : bool, optional
        whether to return (coords, labels), by default True

    Returns
    -------
    ArrayLike
        anchor box coordinates in pascal_voc format
        if named=True, a list of anchor box labels are also returned.
    """
    image_sz = as_tuple(image_sz)
    feature_szs = [8, 2] if feature_szs is None else feature_szs
    feature_szs = [as_tuple(fsz) for fsz in feature_szs]
    asp_ratios = [1 / 2.0, 1.0, 2.0] if asp_ratios is None else asp_ratios
    # always named=True for bx() call. named=True in fn signature of bxs() is in its scope.
    coords_ = [
        bx(image_sz, f, ar, named=True, **kwargs)
        for f in feature_szs
        for ar in asp_ratios
    ]
    coords_, labels_ = L(zip(*coords_))
    coords_ = np.vstack(coords_)
    labels_ = L([l_ for lab_ in labels_ for l_ in lab_])
    return (coords_, labels_) if named else np.vstack(coords_)


In [None]:
coords, labels = bxs(100, [10, 8, 5, 2], [1, 0.5, 0.3])

In [None]:
coords.shape, len(labels)

((587, 4), 587)

All methods work with asymetric `image_sz` (and or `feature_szs` as well):

In [None]:
coords, labels = bxs((100, 200), [10, 8, 5, 2], [1, 0.5, 0.3])

In [None]:
coords.shape, len(labels)

((654, 4), 654)

# Ground truth anchor boxes
Ground truth boxes are anchor boxes with maximum IOU with the true annotations.

Load actual annotations.

In [None]:
true_annots = json.load(open('../data/annots.json'))
true_annots

[{'x_min': 130, 'y_min': 63, 'x_max': 225, 'y_max': 180, 'label': 'clock'},
 {'x_min': 13, 'y_min': 158, 'x_max': 90, 'y_max': 213, 'label': 'frame'}]

Convert to MultiBx for convenience:

In [None]:
true_annots_as_bx = get_bx(true_annots)
true_annots_as_bx

MultiBx(coords=[[130, 63, 225, 180], [13, 158, 90, 213]], label=['clock', 'frame'])

Generate anchor boxes for object detection task, given that we know: 

```py
image_sz = (256, 256)  # to know the upper bounds of candidate bounding boxes
feature_sz = [20, 10, 8, 3]
asp_ratio = [1, 0.5, 0.3]
```

In [None]:
coords, labels = bxs((256, 256), [20, 10, 8, 3], [1, 0.5, 0.3])

In [None]:
coords.shape, len(labels)

((1741, 4), 1741)

In [None]:
n_boxes = coords.shape[0]
n_boxes

1741

Store coords as multibx for convenience.

In [None]:
coords_as_bx = get_bx(coords=coords, label=labels)

Can use the true annotations and anchor boxes to calulate the IOU.

In [None]:
true_annots_as_bx

MultiBx(coords=[[130, 63, 225, 180], [13, 158, 90, 213]], label=['clock', 'frame'])

In [None]:
len(true_annots_as_bx), len(coords_as_bx)

(2, 1741)

The question: for each true label in the provided true annotations, what are the possible ground truth anchor boxes?

In [None]:
for annots in true_annots_as_bx:
    print(annots)

BaseBx(coords=[[130, 63, 225, 180]], label=['clock'])
BaseBx(coords=[[13, 158, 90, 213]], label=['frame'])


In [None]:
gt_anchors_per_class = defaultdict(lambda: L())
iou_per_box = defaultdict(lambda: L())
for annots in true_annots_as_bx:
    label = annots.label[0]  # is a list of len 1
    ious = [annots.iou(coords_as_bx[i]) for i in range(n_boxes)]
    iou_per_box[label].extend(ious)

In [None]:
iou_per_box.keys()

dict_keys(['clock', 'frame'])

In [None]:
max(iou_per_box['clock'])

0.34822804314329736

In [None]:
iou_per_box['clock'].argwhere(gt(0.3))  # indices of boxes with iou > 0.3

(#2) [1719,1728]

In [None]:
iou_per_box['frame'].argwhere(gt(0.3))  # indices of boxes with iou > 0.3

(#3) [1720,1729,1738]

In [None]:
(max(iou_per_box['clock'])), max(iou_per_box['frame'])  # add more anchor boxes to get better ground truth IOUs

(0.34822804314329736, 0.4488243430152144)

Improve the loop to return only those with good IOU:

In [None]:
iou_thresh = 0.3
gt_anchors_per_class = defaultdict(lambda: L())
iou_per_box = defaultdict(lambda: L())
for annots in true_annots_as_bx:
    label = annots.label[0]  # is a list of len 1
    ious = L([annots.iou(coords_as_bx[i]) for i in range(n_boxes)])
    ious_filter = ious.argwhere(gt(iou_thresh))
    # report filtered box IOUs
    iou_per_box[label].extend(ious[ious_filter])
    # report selected boxes
    gt_anchors_per_class[label] = stack_bxs_inplace(
        *[coords_as_bx[i] for i in ious_filter]
    )

In [None]:
iou_per_box['clock'], iou_per_box['frame']

((#2) [0.34,0.34822804314329736],
 (#3) [0.36643389750266303,0.4488243430152144,0.3523238380809595])

In [None]:
gt_anchors_per_class['clock']

MultiBx(coords=[[170, 85, 256, 170], [183, 67, 243, 188]], label=['a_3x3_1.0_5', 'a_3x3_0.5_5'])

Not very far from the original annotations. These scales and aspect ratios can also be read from the label. 
The finer the anchor boxes, the better the starting positions (ground truth anchor boxes).

In [None]:
true_annots[0]

{'x_min': 130, 'y_min': 63, 'x_max': 225, 'y_max': 180, 'label': 'clock'}

In [None]:
tmp = L([1, 2, 3])
msk = tmp.map(lambda x: x>1)

In [None]:
tmp[msk]

(#2) [2,3]

In [None]:
mask2idxs(msk)

[1, 2]

In [None]:
# | export


def get_gt_thresh_iou(
    true_annots,
    anchor_boxes,
    anchor_labels=None,
    iou_thresh=0.3,
    return_ious=False,
    return_masks=False,
    update_labels=True,
):
    """Calculate positive ground truth and extra positive ground truth bounding boxes based on iou threhsold.

    Can result in uneven number of positive ground truth boxes per class.

    Args:
        true_annots (Any): True annotations, typically in `pascal_voc` format
        anchor_boxes (Any): Candidate anchor boxes, typically calculated with `pybx.bxs`
        anchor_labels (List, optional): Anchor box labels, will be overwritten with true labels if `update_labels=True`. Defaults to None.
        iou_thresh (float, optional): IOU threshold to filter out negative ground truth anchor boxes. Defaults to 0.3.
        return_ious (bool, optional): Return IOU values for selected positive ground truth anchor boxes. Defaults to False.
        return_masks (bool, optional): Return boolean masks for all anchor boxes indicating if a box is positive (`True`) or negative (`False`) ground truth box. Defaults to False.
        update_labels (bool, optional): Overwrite with true annotations. Defaults to True.

    Returns:
        dict: positive ground truth anchor boxes per class
        dict: IOU of positive ground truth anchor boxes per class
        dict: boolean list indicating positive ground truth anchor boxes per class
    """
    gt_anchors_per_class = defaultdict(lambda: L())
    iou_per_class = defaultdict(lambda: L())
    mask_per_class = defaultdict(lambda: L())
    true_annots_as_bx = (
        get_bx(true_annots) if not isinstance(true_annots, BX_TYPE) else true_annots
    )
    anchor_labels = (
        [f"bx_{i}" for i in range(len(anchor_boxes))]
        if anchor_labels is None
        else anchor_labels
    )
    coords_as_bx = (
        get_bx(coords=anchor_boxes, label=anchor_labels)
        if not isinstance(anchor_boxes, BX_TYPE)
        else anchor_boxes
    )
    n_boxes = len(coords_as_bx)

    for annots in true_annots_as_bx:
        label = annots.label[0]  # is a list of len 1
        ious = L([round(annots.iou(coords_as_bx[i]), 4) for i in range(n_boxes)])
        # ious_filter = ious.argwhere(gt(iou_thresh))
        mask = ious.map(lambda x: x >= iou_thresh)
        ious_filter = mask2idxs(mask=mask)

        if mask.sum() < 1:
            warnings.warn(
                NoGroundTruthBxs(
                    f"No good ground truth anchors found for label={label}, try lowering threshold (iou_thresh={iou_thresh} or increasing candidates."
                )
            )
            gt_anchors_per_class[label] = None

        if return_ious:
            # report filtered box IOUs
            iou_per_class[label].extend(ious[ious_filter])
        if return_masks:
            mask_per_class[label].extend(mask)
        # report selected boxes
        # print([coords_as_bx[i] for i in ious_filter])
        if mask.sum() > 0:
            gt_anchors_per_class[label] = stack_bxs_inplace(
                *[
                    reassign_label(coords_as_bx[i], label=[label])
                    if update_labels
                    else coords_as_bx[i]
                    for i in ious_filter
                ]
            )

    return dict(gt_anchors_per_class), dict(iou_per_class), dict(mask_per_class)

In [None]:
coords

array([[  0,   0,  12,  12],
       [ 12,   0,  25,  12],
       [ 25,   0,  38,  12],
       ...,
       [ 19, 135,  66, 256],
       [104, 135, 151, 256],
       [189, 135, 236, 256]])

In [None]:
gt_anchors_per_class, ious_per_class, mask_per_class = get_gt_thresh_iou(true_annots, coords, iou_thresh=0.3, return_ious=True)
gt_anchors_per_class

{'clock': MultiBx(coords=[[170, 85, 256, 170], [183, 67, 243, 188]], label=['clock', 'clock']),
 'frame': MultiBx(coords=[[0, 170, 85, 256], [12, 152, 72, 256], [19, 135, 66, 256]], label=['frame', 'frame', 'frame'])}

In [None]:
mask_per_class

{}

In [None]:
true_annots

[{'x_min': 130, 'y_min': 63, 'x_max': 225, 'y_max': 180, 'label': 'clock'},
 {'x_min': 13, 'y_min': 158, 'x_max': 90, 'y_max': 213, 'label': 'frame'}]

In [None]:
ious_per_class['clock']

(#2) [0.34,0.3482]

In [None]:
gt_anchors_per_class['clock'].coords

[[170, 85, 256, 170], [183, 67, 243, 188]]

If anchor box labels are passed, they can be preserved instead of overwriting with ground truth labels.

In [None]:
get_gt_thresh_iou([100, 150, 180, 300, 'hat'], coords, iou_thresh=0.3, anchor_labels=labels, update_labels=False, return_ious=True)

({'hat': MultiBx(coords=[[85, 170, 170, 256], [97, 152, 158, 256], [104, 135, 151, 256]], label=['a_3x3_1.0_7', 'a_3x3_0.5_7', 'a_3x3_0.3_7'])},
 {'hat': (#3) [0.453,0.4899,0.3921]},
 {})

True annots can also be a list containing the label as the last item.

In [None]:
get_gt_thresh_iou([100, 150, 180, 300, 'hat'], coords, iou_thresh=0.3, return_ious=True)

({'hat': MultiBx(coords=[[85, 170, 170, 256], [97, 152, 158, 256], [104, 135, 151, 256]], label=['hat', 'hat', 'hat'])},
 {'hat': (#3) [0.453,0.4899,0.3921]},
 {})

In [None]:
get_gt_thresh_iou([[100, 150, 180, 300, 'hat'], [100, 120, 280, 200, 'shirt']], coords, iou_thresh=0.3, return_ious=True)

/mnt/data/projects/pybx/.venv/lib/python3.7/site-packages/ipykernel_launcher.py:59: NoGroundTruthBxs: No good ground truth anchors found for label=shirt, try lowering threshold (iou_thresh=0.3 or increasing candidates.


({'hat': MultiBx(coords=[[85, 170, 170, 256], [97, 152, 158, 256], [104, 135, 151, 256]], label=['hat', 'hat', 'hat']),
  'shirt': None},
 {'hat': (#3) [0.453,0.4899,0.3921], 'shirt': (#0) []},
 {})

Method to also return just the max IOU ground truth boxes.

In [None]:
# | export


def get_gt_max_iou(
    true_annots,
    anchor_boxes,
    anchor_labels=None,
    return_ious=False,
    return_masks=False,
    positive_boxes=1, 
    update_labels=True
):
    """Calculate positive ground truth and extra positive ground truth bounding boxes based on maximum IOU condition.

    Will always provide a box, therfore constant number `positive_boxes` of positive ground truth boxes per class.

    Args:
        true_annots (Any): True annotations, typically in `pascal_voc` format
        anchor_boxes (Any): Candidate anchor boxes, typically calculated with `pybx.bxs`
        anchor_labels (List, optional): Anchor box labels, will be overwritten with true labels if `update_labels=True`. Defaults to None.
        return_ious (bool, optional): Return IOU values for selected positive ground truth anchor boxes. Defaults to False.
        return_masks (bool, optional): Return boolean masks for all anchor boxes indicating if a box is positive (`True`) or negative (`False`) ground truth box. Defaults to False.
        update_labels (bool, optional): Overwrite with true annotations. Defaults to True.
        positive_boxes (int, optional): Number of extra/positive ground truth boxes to return. Defaults to 1.

    Returns:
        dict: positive ground truth anchor boxes per class
        dict: IOU of positive ground truth anchor boxes per class
        dict: boolean list indicating positive ground truth anchor boxes per class
    """
    gt_anchors_per_class = defaultdict(lambda: L())
    iou_per_class = defaultdict(lambda: L()) 
    mask_per_class = defaultdict(lambda: L())
    true_annots_as_bx = (
        get_bx(true_annots) if not isinstance(true_annots, BX_TYPE) else true_annots
    )
    anchor_labels = (
        [f"bx_{i}" for i in range(len(anchor_boxes))]
        if anchor_labels is None
        else anchor_labels
    )
    coords_as_bx = (
        get_bx(coords=anchor_boxes, label=anchor_labels)
        if not isinstance(anchor_boxes, BX_TYPE)
        else anchor_boxes
    )
    n_boxes = len(coords_as_bx) 

    for annots in true_annots_as_bx:
        label = annots.label[0]  # is a list of len 1
        ious = L([round(annots.iou(coords_as_bx[i]), 4) for i in range(n_boxes)])
        ious_sorted = ious.sorted(reverse=True)
        max_iou = ious_sorted[:positive_boxes]
        ious_filter = [ious.index(m) for m in max_iou]
        mask = L([True if idx in ious_filter else False for idx in range(n_boxes)])
        
        if mask.sum() < 1:
            warnings.warn(
                NoGroundTruthBxs(
                    f"No good ground truth anchors found for label={label}, try increasing candidates."
                )
            )
            gt_anchors_per_class[label] = None

        # print(max_iou, ious_filter)
        if return_ious:
            # report filtered box IOUs
            iou_per_class[label].extend(ious[ious_filter])
        if return_masks:
            mask_per_class[label].extend(mask)
        # report selected boxes
        # print([coords_as_bx[i] for i in ious_filter])
        if mask.sum() > 0:
            gt_anchors_per_class[label] = stack_bxs_inplace(
                *[
                    reassign_label(coords_as_bx[i], label=[label])
                    if update_labels
                    else coords_as_bx[i]
                    for i in ious_filter
                ]
            )

    return dict(gt_anchors_per_class), dict(iou_per_class), dict(mask_per_class)

In [None]:
get_gt_max_iou(true_annots, coords)

({'clock': MultiBx(coords=[[183, 67, 243, 188]], label=['clock']),
  'frame': MultiBx(coords=[[12, 152, 72, 256]], label=['frame'])},
 {},
 {})

Can also use methods for the box to calculate properties or convert to different box formats.

In [None]:
tmp_bx = get_gt_max_iou(true_annots, coords)[0]['clock']
tmp_bx[0]

BaseBx(coords=[[183, 67, 243, 188]], label=['clock'])

In [None]:
tmp_bx[0].xywh

(#1) [[183, 67, 60, 121]]

In [None]:
tmp_bx[0].yolo(w=300, h=300, normalize=False)  # cx, cy, bw, bh

(#1) [[0.71, 0.425, 0.2, 0.4033]]

In [None]:
# here w h is the image w and h
tmp_bx[0].yolo(w=300, h=300, normalize=True)  # cx/w, cy/h, bw/w, bh/h

(#1) [[0.71, 0.425, 0.2, 0.4033]]

In [None]:
np.round([0.4046875, 0.840625, 0.503125, 0.24375], 4)

array([0.4047, 0.8406, 0.5031, 0.2438])

In [None]:
true_annots

[{'x_min': 130, 'y_min': 63, 'x_max': 225, 'y_max': 180, 'label': 'clock'},
 {'x_min': 13, 'y_min': 158, 'x_max': 90, 'y_max': 213, 'label': 'frame'}]

In [None]:
get_gt_max_iou(
    true_annots,
    coords,
    return_ious=True,
    return_masks=True,
    positive_boxes=2,  # number of positive bounding boxes to allow
)

({'clock': MultiBx(coords=[[183, 67, 243, 188], [170, 85, 256, 170]], label=['clock', 'clock']),
  'frame': MultiBx(coords=[[12, 152, 72, 256], [0, 170, 85, 256]], label=['frame', 'frame'])},
 {'clock': (#2) [0.3482,0.34], 'frame': (#2) [0.4488,0.3664]},
 {'clock': (#1741) [False,False,False,False,False,False,False,False,False,False...],
  'frame': (#1741) [False,False,False,False,False,False,False,False,False,False...]})

Vocabulary:
- Ground truth bounding box
- Ground truth anchor box or positive anchor box
- Negative anchor box
- Offset is calculated as ground truth bounding box minus positive anchor box coordinates (not for all anchor boxes, use mask)
- Normalized offsets

# Calculate offsets for ground truth anchor boxes

`BaseBx` also supports calculation of bounding box offset by calling the `get_offset()` method.

In [None]:
# first calculate the ground truth anchor boxes
gt_anchors_per_class, ious_per_class, mask_per_class = get_gt_max_iou(
    true_annots,
    coords,
    return_ious=True,
    return_masks=True,
    positive_boxes=1,  # number of positive bounding boxes to allow
)

In [None]:
mask_per_class

{'clock': (#1741) [False,False,False,False,False,False,False,False,False,False...],
 'frame': (#1741) [False,False,False,False,False,False,False,False,False,False...]}

In [None]:
gt_anchors_per_class

{'clock': MultiBx(coords=[[183, 67, 243, 188]], label=['clock']),
 'frame': MultiBx(coords=[[12, 152, 72, 256]], label=['frame'])}

In [None]:
true_annots_bx = get_bx(true_annots)
true_annots_bx

MultiBx(coords=[[130, 63, 225, 180], [13, 158, 90, 213]], label=['clock', 'frame'])

In [None]:
gt_anchors_per_class['clock']  # this is still a MultiBx

MultiBx(coords=[[183, 67, 243, 188]], label=['clock'])

In [None]:
len(gt_anchors_per_class['clock'])

1

In [None]:
gt_anchors_per_class['clock'][0]  # this is a BaseBx, which wont raise a warning

BaseBx(coords=[[183, 67, 243, 188]], label=['clock'])

In [None]:
true_annots_bx[0].get_offset(gt_anchors_per_class['clock'])  

/mnt/data/projects/pybx/pybx/basics.py:577: BxViolation: Other should be BaseBx, got MultiBx


(#4) [-5.9167,-0.4959,2.2977,-0.1681]

In [None]:
true_annots_bx[0].get_offset(gt_anchors_per_class['clock'][0])  # by default normalize=True

(#4) [-5.9167,-0.4959,2.2977,-0.1681]

With `normalize=False`, it calculates simple difference between centers `(dcx, dcy)` and ratio of width and heights `(w'/w, h'/h)`  the two boxes.

In [None]:
true_annots_bx[0].get_offset(gt_anchors_per_class['clock'][0], normalize=False)  # 

(#4) [-35.5,-6.0,95.0,117.0]

The following function repeats the same operation for multiple boxes, provided the masks (so that only ground truch anchor box offsets are calculated)

In [None]:
# | export

def get_gt_offsets(
    true_annots, 
    anchor_boxes,
    anchor_labels=None,
    masks=None, 
    sigma=(0.1, 0.2), 
    normalize=True,
    log_func=np.log,
):

    Na = len(anchor_boxes)  # no of anchor boxes (includes positive and negative anchor boxes)
    masks = masks if masks is not None else L([True]*Na)
    offsets = np.zeros((Na, 4))

    for idx, (box, mask) in enumerate(zip(anchor_boxes, masks)):
        if mask:
            offsets[idx, :] = true_annots.get_offset(box, normalize=normalize, sigma=sigma, log_func=log_func)
            # print(offsets[idx, :])
    # assert N=(gt_offsets_clock[:, 0]!=0).sum()
    return offsets

In [None]:
#get_gt_offsets(true_annots_bx[0], gt_anchors_per_class['clock'])  # not usually done, typically all anchors are passed
gt_offsets_clock = get_gt_offsets(true_annots_bx[0], coords, masks=mask_per_class['clock']) 

In [None]:
gt_offsets_clock[:, 0]

array([0., 0., 0., ..., 0., 0., 0.])

In [None]:
gt_offsets_clock[mask_per_class['clock']]  # czan use the mask to index the only valid offset

array([[-5.9167, -0.4959,  2.2977, -0.1681]])

Repeating the same operations for all classes.

In [None]:
for idx, true_bx in enumerate(true_annots_bx):
    get_gt_offsets(true_bx, coords, masks=mask_per_class[true_bx.label[0]]) 

In [None]:
#|hide
from nbdev import nbdev_export
nbdev_export()