In [1]:
import os
import numpy as np
import cv2

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='7'

In [3]:
from model_zoo.get_models import get_detection_model,get_landmark_model, get_ageGender_model,get_recognition_model
from face.get_result import *

In [4]:
from utils.util_common import draw_result,draw_result_sim
from data.image import read_image,resize_image

In [5]:
from model_zoo.model_common import load_tensorRT,load_onnx, load_openvino

In [6]:
import matplotlib.pyplot as plt

In [7]:
import torch

In [8]:
model_dict={
            'onnx':"/data/notebook/yoonms/vedadet/tinaface_r50_fpn_bn_sim.onnx",
            'trt':"/data/notebook/yoonms/vedadet/tinaface_r50_fpn_bn_sim.v8.trt",
            'openvino':["/data/notebook/yoonms/vedadet/tinaface_r50_fpn_bn_sim.xml",
               "/data/notebook/yoonms/vedadet/tinaface_r50_fpn_bn_sim.bin"]
           }

In [8]:
img = read_image("/data/notebook/yoonms/GenderAge_inference2/p3.jpg")
img = resize_image(img,(640,640))

In [9]:
model_onnx = load_onnx.Onnx_cv(model_dict['onnx'],output_sort=True,input_mean=0.0, input_std=1.0)
model_trt = load_tensorRT.TrtModel(model_dict['trt'],not_norm=True,output_sort=True,torch_image=True)
model_vino = load_openvino.Openvino(model_dict['openvino'],not_norm=True,torch_image=True)

[04/27/2022-15:44:50] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.6.5 but loaded cuBLAS/cuBLAS LT 11.6.1
[04/27/2022-15:44:50] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.6.5 but loaded cuBLAS/cuBLAS LT 11.6.1


In [379]:
outs_onnx = model_onnx(img)
outs_trt = model_trt(img)
outs_vino = model_vino(img)

[ WARN:0@12929.299] global /io/opencv/modules/dnn/src/dnn.cpp (1483) setUpNet DNN module was not built with CUDA backend; switching to CPU


In [389]:
# if trt model

strides=[4,8,16,32,64,128]
b=[3,12,3]
h=640
w=640

for oi,o in enumerate(outs_trt):
    if w>=h:
        outs_trt[oi] = np.reshape(outs_trt[oi],(o.shape[0],b[oi//6],-1,round(w/strides[oi%6])))
    else:
        outs_trt[oi] = np.reshape(outs_trt[oi],(o.shape[0],b[oi//6],round(h/strides[oi%6],-1)))

In [390]:
featmap_sizes = [feat.shape[-2:] for feat in outs_onnx]
dtype = outs_onnx[0].dtype
device = 'cuda'

In [391]:
strides = [4, 8, 16, 32, 64, 128]
use_sigmoid = True
scales_per_octave = 3
ratios = [1.3]
num_anchors = scales_per_octave * len(ratios)

In [392]:
from bbox_anchor import BBoxBaseAnchor

In [393]:
base_anchors = BBoxBaseAnchor(ratios=ratios,base_sizes=strides, \
                             octave_base_scale=2**(4 / 3),scales_per_octave=3).generate()

In [10]:
img_path = "/data/notebook/yoonms/GenderAge_inference2/p3.jpg"
img = cv2.imread(img_path)

In [11]:
results = [dict(img=img)]

In [12]:
result = results[0]

In [13]:
# 1. Resize
dst_shape = [640,640]
interp = cv2.INTER_LINEAR
keep_ratio = False

img = result['img']
h, w = img.shape[:2]
new_h, new_w = dst_shape
if keep_ratio:
    scale_factor = min(max(new_h, new_w) / max(h, w), min(new_h, new_w) / min(h, w))
    new_h = int(h * float(scale_factor) + 0.5)
    new_w = int(w * float(scale_factor) + 0.5)

img = cv2.resize(img, (new_w, new_h), interpolation=interp)  # TODO time consuming
w_scale = new_w / w
h_scale = new_h / h

scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
                        dtype=np.float32)

result['img'] = img
result['img_shape'] = img.shape
result['pad_shape'] = img.shape
result['scale_factor'] = scale_factor

In [14]:
# 2. ToFloat
keys=['img']
for key in keys:
    result[key] = result[key].astype(np.float32)

In [15]:
# PadIfNeeded

size = None
size_divisor = 32
mode = cv2.BORDER_CONSTANT
value = np.array([123.675, 116.280, 103.530])

img = result['img']
h, w = img.shape[:2]

if size is not None:
    pad_h, pad_w = size
    assert h <= pad_h and w <= pad_w
else:
    pad_h = int(np.ceil(h / size_divisor)) * size_divisor
    pad_w = int(np.ceil(w / size_divisor)) * size_divisor
padded_img = cv2.copyMakeBorder(img, 0, pad_h - h, 0, pad_w - w,
                                mode, value=value)

result['img'] = padded_img
result['img_shape'] = img.shape
result['pad_shape'] = padded_img.shape

In [16]:
import torch

In [17]:
# ImageToTensor

use_gpu=False

img = result['img']
if img.ndim == 2:
    img = img[:, :, None]
img = torch.from_numpy(img)
if use_gpu:
    img = img.cuda()
img = img.permute(2, 0, 1)  # h*w*c -> c*h*w
img = img.float()
img = img.unsqueeze(0)  # c*h*w -> 1*c*h*w

result['img'] = img

In [18]:
# Normalize

mean=[123.675, 116.28, 103.53]
std=[1,1,1]
use_gpu=False
gray=False
to_rgb=True

shape = (1, 1, 1, 1) if gray else (1, 3, 1, 1)

mean = torch.tensor(mean, dtype=torch.float32).view(*shape)
std = torch.tensor(std, dtype=torch.float32).view(*shape)

if use_gpu:
    mean = mean.cuda()
    std = std.cuda()
    
img = result['img']
if not gray and to_rgb:
    # n*c*h*w
    print("bgr to rgb")
    img = img[:, [2, 1, 0], :, :]  # bgr to rgb
img = (img - mean) / std  # time consuming on cpu

result['img'] = img

bgr to rgb


In [19]:
DEFAULT = dict(
    scale_factor=1.0,
    flip=False,
    flip_direction=None,
)


In [20]:
# Collect

keys = ['img']
meta_keys = ['img_shape', 'pad_shape', 'scale_factor', 'flip', 'flip_direction']

data = {}
img_meta = {}

for key in meta_keys:
    if key in result:
        img_meta[key] = result[key]
    else:
        img_meta[key] = DEFAULT[key]

for key in keys:
    data[key] = result[key]
data['img_metas'] = img_meta

In [172]:
#img = data['img']

In [173]:
# model

In [176]:
img.shape

torch.Size([1, 3, 640, 640])

In [178]:
outs_onnx = model_onnx(img)

In [21]:
outs_trt = model_trt(data['img'])
outs_vino = model_vino(data['img'])

In [23]:
outs_trt

[array([[[[-4.2460938, -5.2070312, -5.1367188, ..., -4.8554688,
           -4.9179688, -4.171875 ],
          [-5.5234375, -6.3085938, -6.0742188, ..., -5.6015625,
           -5.6835938, -4.9335938],
          [-5.4257812, -6.078125 , -5.78125  , ..., -5.453125 ,
           -5.4726562, -4.8867188],
          ...,
          [-4.7929688, -5.1835938, -4.9960938, ..., -4.6484375,
           -4.953125 , -4.3359375],
          [-4.7617188, -5.2226562, -5.0546875, ..., -4.3984375,
           -4.8945312, -4.375    ],
          [-3.8945312, -4.4609375, -4.6992188, ..., -3.9882812,
           -4.4804688, -3.859375 ]],
 
         [[-4.2070312, -5.171875 , -4.96875  , ..., -4.8242188,
           -4.7578125, -4.171875 ],
          [-5.5039062, -6.3398438, -5.8710938, ..., -5.4804688,
           -5.4179688, -4.8515625],
          [-5.4179688, -5.9960938, -5.5078125, ..., -5.1835938,
           -5.0898438, -4.6601562],
          ...,
          [-4.671875 , -5.1679688, -4.9023438, ..., -4.3515625,
   

In [22]:
# if trt model

strides=[4,8,16,32,64,128]
b=[3,12,3]
h=640
w=640

for oi,o in enumerate(outs_trt):
    if w>=h:
        outs_trt[oi] = np.reshape(outs_trt[oi],(o.shape[0],b[oi//6],-1,round(w/strides[oi%6])))
    else:
        outs_trt[oi] = np.reshape(outs_trt[oi],(o.shape[0],b[oi//6],round(h/strides[oi%6],-1)))

In [63]:
outs = outs_trt

In [186]:
# postprocess

In [187]:
#featmap_sizes = [feat.shape[-2:] for feat in outs_onnx]
#dtype = outs_onnx[0].dtype
#device = 'cuda'

In [97]:
strides = [4, 8, 16, 32, 64, 128]
use_sigmoid = True
scales_per_octave = 3
ratios = [1.3]
num_anchors = scales_per_octave * len(ratios)

In [65]:
from bbox_anchor import BBoxBaseAnchor

In [66]:
base_anchors = BBoxBaseAnchor(ratios=ratios,base_sizes=strides, \
                             octave_base_scale=2**(4 / 3),scales_per_octave=3).generate()

In [95]:
def _num_base_anchors(base_anchors):
    """list[int]: total number of base anchors in a feature grid"""
    return [base_anchor.size(0) for base_anchor in base_anchors]

In [98]:
def num_levels(strides):
        """int: number of feature levels that the generator will be applied"""
        return len(strides)

In [96]:
num_base_anchors = _num_base_anchors(base_anchors)

In [99]:
num_levels = num_levels(strides)

In [100]:
def _meshgrid(x, y, row_major=True):
        """Generate mesh grid of x and y.

        Args:
            x (torch.Tensor): Grids of x dimension.
            y (torch.Tensor): Grids of y dimension.
            row_major (bool, optional): Whether to return y grids first.
                Defaults to True.

        Returns:
            tuple[torch.Tensor]: The mesh grids of x and y.
        """
        xx = x.repeat(len(y))
        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
        if row_major:
            return xx, yy
        else:
            return yy, xx

In [101]:
def _single_level_anchor_mesh(base_anchors, featmap_size, stride,
                                  device):
        """Generate grid anchors of a single level.

        Note:
            This function is usually called by method ``self.grid_anchors``.

        Args:
            base_anchors (torch.Tensor): The base anchors of a feature grid.
            featmap_size (tuple[int]): Size of the feature maps.
            stride (tuple[int], optional): Stride of the feature map.
                Defaults to (16, 16).
            device (str, optional): Device the tensor will be put on.
                Defaults to 'cuda'.

        Returns:
            torch.Tensor: Anchors in the overall feature maps.
        """
        feat_h, feat_w = featmap_size
        # shift_x = torch.arange(0, feat_w, device=device) * stride[0]
        # shift_y = torch.arange(0, feat_h, device=device) * stride[1]
        shift_x = torch.arange(0, feat_w, device=device) * stride
        shift_y = torch.arange(0, feat_h, device=device) * stride
        shift_xx, shift_yy = _meshgrid(shift_x, shift_y)
        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
        shifts = shifts.type_as(base_anchors)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)

        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.view(-1, 4)
        # first A rows correspond to A anchors of (0, 0) in feature map,
        # then (0, 1), (0, 2), ...
        return all_anchors

In [102]:
def _single_level_valid_flags(featmap_size,
                                  valid_size,
                                  num_base_anchors,
                                  device='cuda'):
        """Generate the valid flags of anchor in a single feature map.
        Args:
            featmap_size (tuple[int]): The size of feature maps.
            valid_size (tuple[int]): The valid size of the feature maps.
            num_base_anchors (int): The number of base anchors.
            device (str, optional): Device where the flags will be put on.
                Defaults to 'cuda'.
        Returns:
            torch.Tensor: The valid flags of each anchor in a single level
                feature map.
        """
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
        valid_x[:valid_w] = 1
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = _meshgrid(valid_x, valid_y)
        valid = valid_xx & valid_yy
        valid = valid[:, None].expand(valid.size(0),
                                      num_base_anchors).contiguous().view(-1)
        return valid

In [103]:
def valid_flags(featmap_sizes, pad_shape, device='cuda'):
        """Generate valid flags of anchors in multiple feature levels.
        Args:
            featmap_sizes (list(tuple)): List of feature map sizes in
                multiple feature levels.
            pad_shape (tuple): The padded shape of the image.
            device (str): Device where the anchors will be put on.
        Return:
            list(torch.Tensor): Valid flags of anchors in multiple levels.
        """
        assert num_levels == len(featmap_sizes)
        multi_level_flags = []
        for i in range(num_levels):
            anchor_stride = strides[i]
            feat_h, feat_w = featmap_sizes[i]
            h, w = pad_shape[:2]
            # valid_feat_h = min(int(np.ceil(h / anchor_stride[0])), feat_h)
            # valid_feat_w = min(int(np.ceil(w / anchor_stride[1])), feat_w)
            valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
            valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
            flags = _single_level_valid_flags(
                (feat_h, feat_w), (valid_feat_h, valid_feat_w),
                num_base_anchors[i],
                device=device)
            multi_level_flags.append(flags)
        return multi_level_flags

In [104]:
def _gen_anchor_mesh(featmap_sizes, dtype, device):
        """Get points according to feature map sizes.
        Args:
            featmap_sizes (list[tuple]): Multi-level feature map sizes.
            dtype (torch.dtype): Type of points.
            device (torch.device): Device of points.
        Returns:
            tuple: points of each image.
        """
        assert num_levels == len(featmap_sizes)
        multi_level_anchors = []
        for i in range(num_levels):
            anchors = _single_level_anchor_mesh(
                base_anchors[i].to(device).to(dtype),
                featmap_sizes[i],
                strides[i],
                device=device)
            multi_level_anchors.append(anchors)
        return multi_level_anchors

In [105]:
def gen_anchor_mesh(featmap_sizes,
                        img_metas,
                        dtype=torch.float,
                        device='cuda'):
    num_imgs = len(img_metas)

    # since feature map sizes of all images are the same, we only compute
    # anchors for one time
    multi_level_anchors = _gen_anchor_mesh(featmap_sizes, dtype,
                                                    'cpu')
    anchor_list = [multi_level_anchors for _ in range(num_imgs)]

    # for each image, we compute valid flags of multi level anchors
    valid_flag_list = []
    for img_id, img_meta in enumerate(img_metas):
        multi_level_flags = valid_flags(featmap_sizes,
                                                 img_meta['pad_shape'], device)
        valid_flag_list.append(multi_level_flags)

    return anchor_list, valid_flag_list

In [106]:
num_fpn_lvls = len(strides)
feats_len = len(outs)

In [107]:
print(num_fpn_lvls, feats_len)

6 18


In [108]:
len(outs)

18

In [109]:
feats = [outs[i:i + num_fpn_lvls] for i in range(0, feats_len, num_fpn_lvls)]

featmap_sizes = [feat.shape[-2:] for feat in feats[0]]
dtype = feats[0][0].dtype
device='cpu'

In [110]:
featmap_sizes

[(160, 160), (80, 80), (40, 40), (20, 20), (10, 10), (5, 5)]

In [92]:
img_metas = []
for i,result in enumerate(datas):
    img_metas.append(result['img_metas'])

In [93]:
len(img_metas)

1

In [111]:
anchor_mesh = gen_anchor_mesh(featmap_sizes,img_metas,device=device)

In [113]:
def delta2bbox(rois,
               deltas,
               means=(0., 0., 0., 0.),
               stds=(1., 1., 1., 1.),
               max_shape=None,
               wh_ratio_clip=16 / 1000):
  
    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
    denorm_deltas = deltas * stds + means
    dx = denorm_deltas[:, 0::4]
    dy = denorm_deltas[:, 1::4]
    dw = denorm_deltas[:, 2::4]
    dh = denorm_deltas[:, 3::4]
    max_ratio = np.abs(np.log(wh_ratio_clip))
    dw = dw.clamp(min=-max_ratio, max=max_ratio)
    dh = dh.clamp(min=-max_ratio, max=max_ratio)
    # Compute center of each roi
    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
    # Compute width/height of each roi
    pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
    ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
    # Use exp(network energy) to enlarge/shrink each roi
    gw = pw * dw.exp()
    gh = ph * dh.exp()
    # Use network energy to shift the center of each roi
    gx = px + pw * dx
    gy = py + ph * dy
    # Convert center-xy/width/height to top-left, bottom-right
    x1 = gx - gw * 0.5
    y1 = gy - gh * 0.5
    x2 = gx + gw * 0.5
    y2 = gy + gh * 0.5
    if max_shape is not None:
        x1 = x1.clamp(min=0, max=max_shape[1])
        y1 = y1.clamp(min=0, max=max_shape[0])
        x2 = x2.clamp(min=0, max=max_shape[1])
        y2 = y2.clamp(min=0, max=max_shape[0])
    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
    return bboxes

In [114]:
def bbox_coder_decode(bboxes,
               pred_bboxes,
                      means,
                      stds,
               max_shape=None,
               wh_ratio_clip=16 / 1000):

        assert pred_bboxes.size(0) == bboxes.size(0)
        decoded_bboxes = delta2bbox(bboxes, pred_bboxes, means, stds,
                                    max_shape, wh_ratio_clip)

        return decoded_bboxes

In [115]:
def bbox_revert(bboxes,
                img_shape,
                scale_factor,
                flip,
                flip_direction='horizontal'):
    """Map bboxes from testing scale to original image scale."""
    new_bboxes = bbox_flip(bboxes, img_shape,
                           flip_direction) if flip else bboxes
    new_bboxes = new_bboxes.view(-1, 4) / new_bboxes.new_tensor(scale_factor)
    return new_bboxes.view(bboxes.shape)

In [116]:
def _get_bboxes_single(cls_score_list,
                           bbox_pred_list,
                           iou_pred_list,
                           mlvl_anchors,
                           img_metas,
                           alpha=0.4,
                       height_th=9,
                           nms_pre=10000,
                           revert=True,
                      use_sigmoid_cls=True,
                      cls_out_channels=1):

        # if len(mlvl_anchors) > 1:
        #     mlvl_anchors = mlvl_anchors[0]

        def filter_boxes(boxes, min_scale, max_scale):
            ws = boxes[:, 2] - boxes[:, 0]
            hs = boxes[:, 3] - boxes[:, 1]
            scales = torch.sqrt(ws * hs)

            return (scales >= max(1, min_scale)) & (scales <= max_scale)

        img_shape = img_metas['img_shape']
        scale_factor = img_metas['scale_factor']
        assert len(cls_score_list) == len(bbox_pred_list) == len(
            iou_pred_list) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        for cls_score, bbox_pred, iou_pred, anchors in zip(
                cls_score_list, bbox_pred_list, iou_pred_list, mlvl_anchors):
            cls_score = torch.from_numpy(cls_score)
            bbox_pred = torch.from_numpy(bbox_pred)
            iou_pred = torch.from_numpy(iou_pred)
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            cls_score = cls_score.permute(1, 2,
                                          0).reshape(-1, cls_out_channels)
            iou_pred = iou_pred.permute(1, 2, 0).reshape(-1, 1).sigmoid()
            if use_sigmoid_cls:
                scores = cls_score.sigmoid()
            else:
                scores = cls_score.softmax(-1)

            if alpha is None:
                scores *= iou_pred
            elif isinstance(alpha, float):
                scores = torch.pow(scores, 2 * alpha) * torch.pow(
                    iou_pred, 2 * (1 - alpha))
            else:
                raise ValueError("alpha must be float or None")

            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                # Get maximum scores for foreground classes.
                if use_sigmoid_cls:
                    max_scores, _ = scores.max(dim=1)
                else:
                    # remind that we set FG labels to [0, num_class-1]
                    # since  v2.0
                    # BG cat_id: num_class
                    max_scores, _ = scores[:, :-1].max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
            bboxes = bbox_coder_decode(
                anchors, bbox_pred,target_means,target_stds ,max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        mlvl_scores = torch.cat(mlvl_scores)

        keeps = filter_boxes(mlvl_bboxes, 1, 10000)
        mlvl_bboxes = mlvl_bboxes[keeps]
        mlvl_scores = mlvl_scores[keeps]

        if use_sigmoid_cls:
            # Add a dummy background class to the backend when using sigmoid
            # remind that we set FG labels to [0, num_class-1] since  v2.0
            # BG cat_id: num_class
            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
        mlvl_centerness = mlvl_scores.new_ones(mlvl_scores.shape[0]).detach()
        if revert:
            flip = img_metas['flip']
            flip_direction = img_metas['flip_direction']
            mlvl_bboxes = bbox_revert(mlvl_bboxes, img_shape, scale_factor,
                                      flip, flip_direction)

        if height_th is not None:
            hs = mlvl_bboxes[:, 3] - mlvl_bboxes[:, 1]
            valid = (hs >= height_th)
            mlvl_bboxes, mlvl_scores, mlvl_centerness = (
                mlvl_bboxes[valid], mlvl_scores[valid], mlvl_centerness[valid])

        return mlvl_bboxes, mlvl_scores, mlvl_centerness

In [117]:
def get_bboxes(mlvl_anchors, img_metas, cls_scores, bbox_preds,
                   iou_preds):

        assert len(cls_scores) == len(bbox_preds) == len(iou_preds)
        num_levels = len(cls_scores)

        result_list = []
        for img_id in range(len(img_metas)):
            cls_score_list = [
                cls_scores[i][img_id] for i in range(num_levels)
            ]
            bbox_pred_list = [
                bbox_preds[i][img_id] for i in range(num_levels)
            ]
            iou_pred_list = [
                iou_preds[i][img_id] for i in range(num_levels)
            ]

            # TODO: hard code. 0 for anchor_list, 1 for valid_flag_list
            anchors = mlvl_anchors[0][img_id]
            proposals = _get_bboxes_single(cls_score_list, bbox_pred_list,
                                                iou_pred_list, anchors,
                                                img_metas[img_id])
            result_list.append(proposals)
        return result_list

In [118]:
target_means=[.0, .0, .0, .0]
target_stds=[0.1, 0.1, 0.2, 0.2]

In [119]:
datas = [data]

In [120]:
dets = get_bboxes(anchor_mesh, img_metas, *feats)

In [121]:
dets

[(tensor([[ 974.0639,  590.5913, 1000.5275,  616.8153],
          [ 974.4045,  596.3999, 1002.2957,  622.6239],
          [ 977.3308,  584.8152, 1003.4970,  611.0392],
          ...,
          [ 741.8045,  510.0506, 1228.0000,  920.0000],
          [ 558.7751,  493.8341, 1228.0000,  920.0000],
          [ 572.1704,  325.1985, 1228.0000,  920.0000]]),
  tensor([[0.0755, 0.0000],
          [0.0725, 0.0000],
          [0.0666, 0.0000],
          ...,
          [0.4180, 0.0000],
          [0.3935, 0.0000],
          [0.4885, 0.0000]]),
  tensor([1., 1., 1.,  ..., 1., 1., 1.]))]

In [125]:
dets[0][1][]

tensor([[0.0755, 0.0000],
        [0.0725, 0.0000],
        [0.0666, 0.0000],
        ...,
        [0.5177, 0.0000],
        [0.4180, 0.0000],
        [0.3935, 0.0000]])

In [53]:
def bbox2result(bboxes, labels, num_classes):
    if bboxes.shape[0] == 0:
        return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
    else:
        bboxes = bboxes.cpu().numpy()
        labels = labels.cpu().numpy()
        return [bboxes[labels == i, :] for i in range(num_classes)]

In [54]:
def nms_eachScore(dets, scores,thresh):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()

    keep = []

    while order.size()[0] > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

In [55]:
def multiclass_nms(multi_bboxes,
                   multi_scores,
                   score_thr,
                   iou_thr,
                   max_num=-1,
                   score_factors=None):

    num_classes = multi_scores.size(1) - 1
    # exclude background category
    if multi_bboxes.shape[1] > 4:
        bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
    else:
        bboxes = multi_bboxes[:, None].expand(-1, num_classes, 4)
    scores = multi_scores[:, :-1]
    
    # filter out boxes with low scores
    if score_factors is not None:
        scores = scores * score_factors[:, None]
    valid_mask = scores > score_thr
    bboxes = bboxes[valid_mask]
    scores = scores[valid_mask]
    labels = valid_mask.nonzero(as_tuple=False)[:, 1]

    if bboxes.numel() == 0:
        bboxes = multi_bboxes.new_zeros((0, 5))
        labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
        return bboxes, labels, scores
    
    keep = nms_eachScore(bboxes,scores,iou_thr)
    
    dets = bboxes[keep, :]
    scores = scores[keep]
    
    
    

    if max_num > 0:
        dets = dets[:max_num]
        keep = keep[:max_num]
        scores = scores[:max_num]

    return dets, labels[keep],scores
    

In [56]:
batch_size = len(dets)

In [57]:
cls_out_channels=1

min_bbox_size=0
score_thr=0.02
iou_thr=0.45
max_per_img=300

In [58]:
result_list=[]
for ii in range(batch_size):
    bboxes, scores, centerness = dets[ii]
    det_bboxes, det_labels, det_scores = multiclass_nms(
        bboxes,
        scores,
        score_thr,
        iou_thr,
        max_per_img,
        score_factors=centerness)
    
    det_bboxes = torch.cat([det_bboxes, det_scores[:, None]], -1)

    bbox_result = bbox2result(det_bboxes, det_labels,
                                cls_out_channels)
    result_list.append(bbox_result)

result['out'] = result_list


In [59]:
result['out'][0][0].shape

(300, 5)

In [60]:
max_score=0
for o in result['out'][0][0]:
    score = o[-1]
    if max_score<score:
        max_score=score

In [61]:
max_score

0.14552505