In [1]:
import torch
import numpy as np
from utility import *
from box_utils import *
from torch.autograd import Function

Using TensorFlow backend.


In [2]:
def nms_torch(boxes, scores, overlap=0.5, top_k=200):
    """Apply non-maximum suppression at test time to avoid detecting too many
    overlapping bounding boxes for a given object.
    Args:
        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
        scores: (tensor) The class predscores for the img, Shape:[num_priors].
        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
        top_k: (int) The Maximum number of box preds to consider.
    Return:
        The indices of the kept boxes with respect to num_priors.
    """

    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0:
        return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    # I = I[v >= 0.01]
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    # keep = torch.Tensor()
    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        # keep.append(i)
        keep[count] = i
        count += 1
        if idx.size(0) == 1:
            break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]

    return keep, count

In [3]:
class Detect(Function):
    """At test time, Detect is the final layer of SSD.  Decode location preds,
    apply non-maximum suppression to location predictions based on conf
    scores and threshold to a top_k number of output predictions for both
    confidence score and locations.
    """
    def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
        self.num_classes = num_classes
        self.background_label = bkg_label
        self.top_k = top_k
        # Parameters used in nms.
        self.nms_thresh = nms_thresh
        if nms_thresh <= 0:
            raise ValueError('nms_threshold must be non negative.')
        self.conf_thresh = conf_thresh
        self.variance = [0.1, 0.2]

    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            print(decoded_boxes.size())
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
#                 print(c_mask.size())
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
#                 print(l_mask.size())
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
#                 output[i, cl, :count] = \
#                     torch.cat((scores[ids[:count]].unsqueeze(1),
#                                boxes[ids[:count]]), 1)
#         flt = output.contiguous().view(num, -1, 5)
#         _, idx = flt[:, :, 0].sort(1, descending=True)
#         _, rank = idx.sort(1)
#         flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return ids, count

In [4]:
def nms_np(boxes, scores, overlap = 0.5, top_k= 200):
    

    keep = np.zeros(shape = scores.shape[0], dtype = np.float32)
    
    if len(boxes) == 0:
        return keep
    
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    
    x2 = boxes[:,2]
    y2 = boxes[:,3]
    
    area = (x2 - x1) * (y2 - y1)
    
    idx = np.argsort(scores)
    
    idx = idx[-top_k:]
    

    
    count = 0 
    
    while len(idx) > 0:
        i = idx[-1]  # index of current largest val
        
        keep[count] = i
        count += 1
        
        if idx.shape[0] == 1:
            break
            
        idx = idx[:-1]
        
        xx1 = np.take(x1, indices=idx, axis=0)
        yy1 = np.take(y1, indices=idx, axis=0)
        xx2 = np.take(x2, indices=idx, axis=0)
        yy2 = np.take(y2, indices=idx, axis=0)
        
        xx1 = np.clip(xx1, a_min = x1[i],  a_max=None)
        yy1 = np.clip(yy1, a_min = y1[i],  a_max=None)
        xx2 = np.clip(xx2, a_min = None,   a_max=x2[i])
        yy2 = np.clip(yy2, a_min = None,   a_max=x2[i])
        
        w = xx2 - xx1
        h = yy2 - yy1

        
        w = np.clip(w, a_min = 0., a_max = None)
        h = np.clip(h, a_min = 0., a_max = None)
        
        inter = w * h
        rem_areas = np.take(area, indices = idx, axis = 0) # load remaining areas
        union     = (rem_areas - inter) + area[i]
        IOU       = inter/union
        idx       = idx[np.less_equal(IOU, overlap)]
#         print(idx.shape)
    return keep, count

In [5]:
score = np.random.randn(8732)
boxes = np.random.randn(8732, 4)
scores = torch.Tensor(score)
boxes_ = torch.Tensor(boxes)

In [6]:
nms_np(boxes=boxes, scores=score, overlap=0.5)

(array([5819., 5454., 4754., ...,    0.,    0.,    0.], dtype=float32), 182)

In [7]:
nms_torch(boxes=boxes_, scores=scores, overlap=0.5)

(tensor([5819, 5454, 4754,  ...,    0,    0,    0]), 198)

In [8]:
class Detect_np(object):
    def __init__(self, 
                 num_classes   = 21, 
                 bkg_label   = None, 
                 conf_thresh = 0.6, 
                 nms_thresh  = 0.6, 
                 top_k       = 200,
                 variances   = [0.1, 0.2]):
        
        self.num_classes = num_classes
        self.bkg_label   = bkg_label
        self.conf_thresh = conf_thresh
        self.nms_thresh  = nms_thresh
        self.top_k       = top_k
        self.variances    = variances
    
    def forward(self, loc_data, conf_data, priors):
        
#         loc_data   = prediction[:,:,:4]
#         conf_data  = prediction[:,:,4:]
        
        num_priors = priors.shape[0]
        batch_size = loc_data.shape[0]
        
        output  = np.zeros(shape=(batch_size, self.num_classes, self.top_k, 5), dtype= np.float32)
        
        conf_preds = conf_data.swapaxes(2,1)
        
        for i in range(batch_size):
            decoded_boxes = decode_np(loc    = loc_data[i], 
                                      priors = priors,
                                      variances=self.variances)
            
            conf_scores = conf_pred[i].copy()
            
            for cl in range(1, self.num_classes):
                c_mask = np.greater(conf_scores[cl], self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                
                if scores.shape[0] == 0:
                    continue
                
                l_mask =  c_mask.reshape(-1,1).repeat(4, axis= -1)   
                boxes  =  decoded_boxes[l_mask].reshape(-1,4) 
#                 print(boxes.shape)
                
                ids, count = non_maximum_supression(boxes    =  boxes,
                                                    scores   = scores, 
                                                    overlap  =  self.nms_thresh,
                                                    top_k    = self.top_k)
                
#                 print(ids.shape)
#                 print(count)
                
                
#                 output[i, cl, :count] = np.concatenate((temp.reshape(-1,1), 
#                                                         boxes[ids[:count]]), axis=-1)
                
#         flt = output.ascontiguousarray().reshape(batch_size, -1, 5)
#         idx  = np.argsort(flt[:,:,0], axis=-1)
#         rank = np.argsort(idx, axis=-1)
        
#         flt[rank < self.top_k].ex
        return ids, count

In [9]:
loc_data = np.random.randn(4, 8732, 4)
loc_data_th = torch.Tensor(loc_data).view(4, -1)

In [10]:
priors    = np.random.randn(8732, 4)
priors_th = torch.Tensor(priors)

In [11]:
conf_data = np.random.randn(4, 8732, 21)
conf_data_th = torch.Tensor(conf_data).view(-1, 21)
conf_data.shape

(4, 8732, 21)

In [12]:
conf_pred = conf_data.swapaxes(2,1)
conf_pred.shape

(4, 21, 8732)

In [13]:
a = np.array([1,2,3,4])
aa = torch.Tensor(a)

In [14]:
ax = aa.clone()

In [15]:

hello = Detect(num_classes=21, bkg_label=None,conf_thresh=0.5, nms_thresh=0.6, top_k=200)

test = hello.forward(loc_data=loc_data_th, conf_data=conf_data_th, prior_data=priors_th)

IndexError: too many indices for tensor of dimension 1

In [None]:
ids, count = test

In [None]:
hello2 = Detect_np(num_classes   = 21, 
                 bkg_label   = None, 
                 conf_thresh = 0.6, 
                 nms_thresh  = 0.6, 
                 top_k       = 200,
                 variances   = [0.1, 0.2])

test2 = hello2.forward(loc_data= loc_data, conf_data = conf_data, priors= priors)

In [None]:
ids_, count_ = test2

In [None]:
ids_[:count_]

In [None]:
ids[:count]