# Table of Contents
 <p><div class="lev2 toc-item"><a href="#Select-Mask" data-toc-modified-id="Select-Mask-01"><span class="toc-item-num">0.1&nbsp;&nbsp;</span>Select Mask</a></div><div class="lev2 toc-item"><a href="#Classification-loss" data-toc-modified-id="Classification-loss-02"><span class="toc-item-num">0.2&nbsp;&nbsp;</span>Classification loss</a></div><div class="lev2 toc-item"><a href="#Ground-truth-encoding" data-toc-modified-id="Ground-truth-encoding-03"><span class="toc-item-num">0.3&nbsp;&nbsp;</span>Ground truth encoding</a></div>

In [41]:
import chainer 
import chainer.functions as F 
import numpy as np 

In [42]:
# Initializing the ground truth chainer tensor
ystar_data = np.zeros((2,1,3,3))
ystar_data[0,:,0:2,1:3] =1
ystar_data[1,:,1:3,0:2]= 1

y_star = chainer.Variable(ystar_data)
print(y_star)

variable([[[[ 0.  1.  1.]
            [ 0.  1.  1.]
            [ 0.  0.  0.]]]


          [[[ 0.  0.  0.]
            [ 1.  1.  0.]
            [ 1.  1.  0.]]]])


In [43]:
# Initializing the network probability tensor
probs = np.array([[[0.3, 0.1, 0.8], [0.4, 0.1, 0.9], [0.6, 0.7, 0.5]], [[0.3, 0.1, 0.8], [0.4, 0.1, 0.9], [0.6, 0.7, 0.5]]]).reshape(2,1,3,3)
y_hat = chainer.Variable(probs)
print(y_hat)

variable([[[[ 0.3  0.1  0.8]
            [ 0.4  0.1  0.9]
            [ 0.6  0.7  0.5]]]


          [[[ 0.3  0.1  0.8]
            [ 0.4  0.1  0.9]
            [ 0.6  0.7  0.5]]]])


In [44]:
# Calculating the absolute loss
l_conf_abs = (y_hat - y_star)**2

In [45]:
# This should be a function of its own
# Selection mask tensor calculation numpy
l_conf_abs_data = l_conf_abs.data


## Select Mask 

In [46]:
def select_mask_fn(l_conf_abs, y_star):
    """ 
    Returns a mask for both positive and negative pixels to be selected or not
    
    Args:
        l_conf_abs (b,1,h,w): Numpy array
        y_star (b,1,h,w): Numpy array,
        
    Returns:
        select_mask (b,1,h,w):
    
    """
    yinv = 1-y_star
    l_neg = yinv*l_conf_abs

    select_mask = np.zeros((2,1,3,3))

    for num,i in enumerate(l_neg):
        indices = np.argsort(i.data,axis=None )

        matrix_indices = np.unravel_index(indices,(3,3))
        matrix_indices_flipped = np.fliplr(matrix_indices)

        k_value= np.sum(y_star[num,:,:])
       
        k_value = int(k_value)

        matrix_indices_flipped = matrix_indices_flipped[:,0:k_value]

        select_mask[num,0,matrix_indices_flipped[0],matrix_indices_flipped[1]]= 1
        
        select_mask[num,0,:,:] =select_mask[num,0,:,:] + y_star[num,0,:,:]
        
    return select_mask 

print(select_mask_fn(l_conf_abs, y_star.data))
        
    

[[[[ 0.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]]


 [[[ 1.  0.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]]]


## Classification loss


In [47]:
def classification_loss( y_hat, y_gt): 
    """
    Calculate the classification loss
    
    Args:
        y_hat (b,1,h,w): Numpy array
        y_gt (b,1,h,w): Numpy array,
        
    Returns:
        scalar loss value 
        
        """
    y_gt = y_star
    loss_per_pixel = (y_hat-y_gt)**2
    select_mask= select_mask_fn(loss_per_pixel, y_star.data)
    selected_loss  = loss_per_pixel*select_mask    
   
    return np.sum(selected_loss)/selected_loss.shape[0]
    
    
loss_value  = classification_loss(y_hat,y_star)
loss_value.data

array(3.0700000000000003)

## Ground truth encoding 

In [59]:
gt_data = np.zeros((2,1,4,4))
gt_data


array([[[[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]]],


       [[[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]]]])

In [92]:
# box dimensions 
box_ch1 = np.array([1,1,2,2])

gt_data[0,0,1:3,1:3] = 1 
gt_data


array([[[[ 0.,  0.,  0.,  0.],
         [ 0.,  1.,  1.,  0.],
         [ 0.,  1.,  1.,  0.],
         [ 0.,  0.,  0.,  0.]]],


       [[[ 0.,  0.,  1.,  1.],
         [ 0.,  0.,  1.,  1.],
         [ 1.,  1.,  0.,  0.],
         [ 1.,  1.,  0.,  0.]]]])

In [93]:
# box dimensions 
box_ch2 = np.array(([0,2,2,3],[0,2,3,1]) )

gt_data[1,0,0:2,2:4] = 1
gt_data[1,0,2:4,0:2] = 1

gt_data

array([[[[ 0.,  0.,  0.,  0.],
         [ 0.,  1.,  1.,  0.],
         [ 0.,  1.,  1.,  0.],
         [ 0.,  0.,  0.,  0.]]],


       [[[ 0.,  0.,  1.,  1.],
         [ 0.,  0.,  1.,  1.],
         [ 1.,  1.,  0.,  0.],
         [ 1.,  1.,  0.,  0.]]]])

In [103]:
def box2center(val ): 
    
    center_x = val[0] +((val[2]-val[0])/2)
    center_y = val[1] +((val[3]-val[1])/2)
    width= val[2] -val[0]
    height = val[3] -val[1]
    
    return(np.array([center_x, center_y, width, height]))
    
    
all_centers=  np.zeros((2,1,4))  

for i, val in enumerate(box_ch2): 
    center_x,center_y, width, height = box2center(val)
  
    print('center_x ',center_x,'center_y ',center_y)
        

center_x  1.0 center_y  2.5
center_x  1.5 center_y  1.5


In [100]:
## First object  
box_shifts = np.zeros((2,4,4,4))


for i,val in enumerate(gt_data): 
    shift_channel =  box_shifts[i,0,:,:] 
    indices = np.where(val==1)
    
    print(indices)
    

(array([0, 0, 0, 0]), array([1, 1, 2, 2]), array([1, 2, 1, 2]))
(array([0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 1, 1, 2, 2, 3, 3]), array([2, 3, 2, 3, 0, 1, 0, 1]))


1.0 2.5
1.5 1.5
