# Developing the IoU Filtering Procedure Using Torch Tensors

In [2]:
import torch 
from torchvision import ops 

Helper function for creating some random boxes:

In [3]:
def random_boxes(num_boxes, format='xywh'):

    xy = torch.randint(0, 100, (num_boxes, 2))
    wh = torch.randint_like(xy, 200) + xy
    
    boxes = torch.concat([xy, wh], dim=-1)
    return ops.box_convert(boxes, in_fmt='xyxy', out_fmt=format)


In [4]:

num_true_boxes = 4
num_proposed_boxes = 10

In [5]:
true_boxes = random_boxes(num_true_boxes, format='xyxy')
proposed_boxes = random_boxes(num_proposed_boxes, format='xyxy')

In [6]:
true_boxes

tensor([[ 91,  63,  97, 122],
        [ 28,  35,  86, 204],
        [ 95,   7, 126, 150],
        [ 63,  37, 213, 122]])

In [7]:
proposed_boxes

tensor([[ 47,   8, 114, 114],
        [ 47,  77, 123, 238],
        [ 30,  89,  51, 263],
        [ 95,  54, 188,  57],
        [ 66,  72, 254, 139],
        [ 57,  31, 226, 160],
        [ 17,  61, 178, 149],
        [ 22,  19,  74, 127],
        [ 65,  30,  99, 200],
        [ 12,  28,  50, 217]])

Calculate the iou between boxes. In order to do so, we must unpack the batch dimensions, run the method seperately, and then pack them back together.

In [8]:
ious = ops.box_iou(proposed_boxes, true_boxes)
ious

tensor([[0.0428, 0.2229, 0.2115, 0.2466],
        [0.0219, 0.2899, 0.1398, 0.1212],
        [0.0000, 0.2187, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0201, 0.0219],
        [0.0237, 0.0636, 0.1389, 0.4084],
        [0.0162, 0.1296, 0.1636, 0.5848],
        [0.0250, 0.2705, 0.1719, 0.3525],
        [0.0000, 0.3783, 0.0000, 0.0536],
        [0.0612, 0.2860, 0.0493, 0.1848],
        [0.0000, 0.2803, 0.0000, 0.0000]])

Get the indices corresponding to the maximum IoU:

In [9]:
max_ious = torch.max(ious, dim=-1, )

In [10]:
max_ious

torch.return_types.max(
values=tensor([0.2466, 0.2899, 0.2187, 0.0219, 0.4084, 0.5848, 0.3525, 0.3783, 0.2860,
        0.2803]),
indices=tensor([3, 1, 1, 3, 3, 3, 3, 1, 1, 1]))

Grab the corresponding true boxes. This creates a tensor of the same shape as the true boxes where each proposed box is matched with the true box of maximum overlap.

In [11]:
matching_true_boxes = true_boxes[max_ious.indices]
matching_true_boxes

tensor([[ 63,  37, 213, 122],
        [ 28,  35,  86, 204],
        [ 28,  35,  86, 204],
        [ 63,  37, 213, 122],
        [ 63,  37, 213, 122],
        [ 63,  37, 213, 122],
        [ 63,  37, 213, 122],
        [ 28,  35,  86, 204],
        [ 28,  35,  86, 204],
        [ 28,  35,  86, 204]])

We also have the value of the maximum:

In [12]:
max_ious.values

tensor([0.2466, 0.2899, 0.2187, 0.0219, 0.4084, 0.5848, 0.3525, 0.3783, 0.2860,
        0.2803])

Now, we assign labels to each of the proposed boxes based on the level of overlap with the IoUs. We intend to find a specified number of positives and negatives. 
For this toy example, we will try to extract 1 positive and 2 negatives from the samples. There are 2 cases for positive examples:

- Positive examples correspond to an iou of 0.7 or above, or:
- Positive examples are chosen based on which has the highest iou score. 

We will actually allow the threshold of 0.7 to be flexible, that is, we can make the threshold for a positive example lower. 

For negative (background) examples, the IoU must be lower than 0.3


In [13]:
min_num_positives = 3
positivity_threshold = 0.5
negativity_threshold = 0.3

Initialize a labels tensor with -1 (which indicates no label):

In [14]:
labels = torch.ones_like(max_ious.values) * -1

Find the labels where the values are greater than 0.7 and set them to 1:

In [15]:

labels.masked_fill_( max_ious.values >= positivity_threshold, 1)

tensor([-1., -1., -1., -1., -1.,  1., -1., -1., -1., -1.])

If there are not enough positives obtained from thresholding, we just label the ones with the highest iou as positive. 

In [16]:
if torch.sum(max_ious.values >= positivity_threshold).item() < min_num_positives:
    indices = torch.sort(max_ious.values, dim=-1, descending=True).indices
    labels[indices[:min_num_positives]] = 1

In [17]:
labels

tensor([-1., -1., -1., -1.,  1.,  1., -1.,  1., -1., -1.])

Mark all background examples (iou < 0.3) as background:

In [18]:
labels.masked_fill_( max_ious.values < negativity_threshold, 0)

tensor([ 0.,  0.,  0.,  0.,  1.,  1., -1.,  1.,  0.,  0.])

In reality, we will always have enough low iou examples to have plenty of background examples.

## The Algorithm:

INPUT: 
- A tensor of true boxes. This tensor will have dimensionality (num_true_boxes, 4), where b is the batch size.
- A tensor of proposed boxes. This tensor will have dimensionality (num_proposed_boxes, 4).
  
OUTPUT:
- The same tensor of proposed boxes
- A tensor of matching boxes which has the same dimensionality as the proposed boxes, which matches them in the sense that matching_boxes[i, j, :] contains the true box with the highest overlap with the proposed box at position proposed_boxes[i, j, :]
- A tensor of labels describing the match as a positive example (1), negative example (0), or no example(-1). This tensor will have dimensionality (b, num_proposed_booxes).


In [231]:
def match_boxes(true_boxes: torch.Tensor, proposed_boxes: torch.Tensor, 
                min_num_positives: int, in_format: str = 'xywh', 
                positivity_threshold: float = 0.7, 
                negativity_threshold: float = 0.3):
    
    assert len(true_boxes.shape) == 2
    assert len(proposed_boxes.shape) == 2
    
    num_true_boxes, _ = true_boxes.shape
    num_proposed_boxes, _ = proposed_boxes.shape

    ious = ops.box_iou(
        ops.box_convert(proposed_boxes, in_fmt=in_format, out_fmt='xyxy'),
        ops.box_convert(true_boxes, in_fmt=in_format, out_fmt='xyxy')
    )
    
    max_ious = torch.max(ious, dim=-1, )
    matching_true_boxes = true_boxes[max_ious.indices]
    
    labels = torch.ones_like(max_ious.values) * -1
    
    # select positives
    labels.masked_fill_( max_ious.values >= positivity_threshold, 1)
    
    # add more positives if not enough
    if torch.sum(max_ious.values >= positivity_threshold).item() < min_num_positives:
        indices = torch.sort(max_ious.values, dim=-1, descending=True).indices
        labels[indices[:min_num_positives]] = 1
        
    # select negatives
    labels.masked_fill_( max_ious.values < negativity_threshold, 0)
    
    return matching_true_boxes, proposed_boxes, labels
    

## The algorithm when there is more than one object class

So far, our algorithm works when we have only 2 classes: 0 (background) and 1(object). We would like it to work when there is more than one foreground class as well. In this case, we will have a tensor of true box labels together with the true boxes:

In [92]:
true_boxes = true_boxes
proposed_boxes = proposed_boxes
true_box_labels = torch.randint(1, 3, (len(true_boxes),)).long()
in_format = 'xywh'
positivity_threshold = 0.5
negativity_threshold = 0.3
min_num_positives = 3

We go through the first few steps of our algorithm:

In [93]:
assert len(true_boxes.shape) == 2
assert len(proposed_boxes.shape) == 2

num_true_boxes, _ = true_boxes.shape
num_proposed_boxes, _ = proposed_boxes.shape

ious = ops.box_iou(
    ops.box_convert(proposed_boxes, in_fmt=in_format, out_fmt='xyxy'),
    ops.box_convert(true_boxes, in_fmt=in_format, out_fmt='xyxy')
)

now, we will match the proposed boxes with their ground truth boxes and their labels:

In [94]:
max_ious = torch.max(ious, dim=-1, )
matching_true_boxes = true_boxes[max_ious.indices]

if true_box_labels is not None:
    matching_true_box_labels = true_box_labels[max_ious.indices]

When we select the labels, we will now use the labels from the matching labels for the positives:

In [95]:
labels = (torch.ones_like(max_ious.values) * -1).long()

indices = torch.tensor(range(len(labels)))

positive_indices = indices[max_ious.values >= positivity_threshold]
if len(positive_indices) < min_num_positives:
    positive_indices = torch.sort(max_ious.values, dim=-1, descending=True).indices[:min_num_positives]

if true_box_labels is not None: 
    labels[positive_indices] = matching_true_box_labels[positive_indices] 
else:
    labels[positive_indices] = 1

negative_indices = indices[max_ious.values < negativity_threshold]

labels[negative_indices] = 0

Here is the revised algorithm:

In [None]:
def match_proposed_boxes_to_true(
        true_boxes: torch.Tensor, 
        proposed_boxes: torch.Tensor, 
        min_num_positives: int, 
        in_format: str = 'xywh', 
        true_box_labels: torch.Tensor = None, 
        positivity_threshold: float = 0.7, 
        negativity_threshold: float = 0.3
    ):
    """Matches proposed bounding boxes to a tensor of ground truth bounding boxes
       and returns a tensor of labels indicating positive (1, object) or negative 
       (0, no object) or inconclusive (-1) for each match based on whether 
       a certain IoU threshold with a ground truth box is met. This labeling is done
       according to specified thresholds and also with a specified minimum number 
       of positives. If the positivity threshold does not generate enough positives,
       they will be generated by choosing the ones with the best overlap.

    Args:
        true_boxes (torch.Tensor): A tensor of boxes of shape (N, 4)
        
        proposed_boxes (torch.Tensor): A tensor of boxes of shape (M, 4)
        
        min_num_positives (int): minimum number of positives generated by the matching
        
        in_format (str, optional): string specifying the string format - 
        see torchvision ops documentation.Defaults to 'xywh'.
        
        box_labels (torch.Tensor, optiona): tensor of shape (N) giving the class labels
        corresponding with the ground truth boxes. 
        
        positivity_threshold (float, optional): Above this threshold a proposed box will 
        be considered to match with the ground truth. Defaults to 0.7.
        
        negativity_threshold (float, optional): below this threshold a box will be considered 
        to be background. Defaults to 0.3.

    Returns:
        [type]: [description]
    """
    assert len(true_boxes.shape) == 2
    assert len(proposed_boxes.shape) == 2
    
    num_true_boxes, _ = true_boxes.shape
    num_proposed_boxes, _ = proposed_boxes.shape

    ious = ops.box_iou(
        ops.box_convert(proposed_boxes, in_fmt=in_format, out_fmt='xyxy'),
        ops.box_convert(true_boxes, in_fmt=in_format, out_fmt='xyxy')
    )
    
    max_ious = torch.max(ious, dim=-1, )
    matching_true_boxes = true_boxes[max_ious.indices]
    if true_box_labels is not None:
        matching_true_box_labels = true_box_labels[max_ious.indices]
    
    labels = (torch.ones_like(max_ious.values) * -1).long()
    indices = torch.tensor(range(len(labels)))

    negative_indices = indices[max_ious.values < negativity_threshold]

    labels[negative_indices] = 0

    positive_indices = indices[max_ious.values >= positivity_threshold]
    if len(positive_indices) < min_num_positives:
        positive_indices = torch.sort(max_ious.values, dim=-1, descending=True).indices[:min_num_positives]

    if true_box_labels is not None: 
        labels[positive_indices] = matching_true_box_labels[positive_indices] 
    else:
        labels[positive_indices] = 1

    return {
        'matching_true_boxes': matching_true_boxes, 
        'proposed_boxes': proposed_boxes,
        'labels': labels
    }