## Plan
* Sit for 8 hours and finish it. Rest will happen automatically. 
* Function call flow:
* x -> x_down -> y_loc, y_class -> pred_loc, pred_class -> loss_class, loss_reg -> loss
* This completes the feedback loop.

## Classes
The following classes/methods exist in the dataset
* downsample
* encode
* Network
* Loss

In [None]:
def encode_y(x_down, labels):
    """
    x_downsampled tensor -> y_tensor: Numpy
    
    Args:
        x_down (b, 3, 60, 60): Downsampled list of images
        labels_down (b, v, 2): List of lists of lists (downsampled boxes)
        
    Returns:
        y_class (b, 1, 60, 60): Tensor containing mask for each image. Calculated within encode
        y_loc (b, 4, 60, 60):
    """
    
    y_class = np.zeros((x_down.shape[0], x_down.shape[2], x_down.shape[3]))
    y_loc = np.zeros((x_down.shape[0], 4, x_down.shape[2], x_down.shape[3]))
    
    y_class[x_down[:,0,:,:]>0] = 1 # Can also choose a smaller neighbourhood here
    pos_inds = np.argwhere(y_class) # y_class is also the positive examples mask
    
    for b, y, x in pos_inds:
        y_loc[b, :, y, x] = match_boxes(x, y, labels_down[b])
        
    y_class = y_class.reshape(x_down.shape[0], 1, x_down.shape[2], x_down.shape[3])
    
    return y_class, y_loc

def match_boxes(x, y, boxes):
    """ Numpy
    Matches a point (x,y) to a bunch of boxes. Returns offset of the box with the nearest centre.
    
    Args:
        x (scalar): X coordinate of point being matched
        y (scalar): Y coordinate of point being matched
        boxes: List of list of downsampled boxes being matched, in (tx, ty, bx, by) notation
    
    """
    
    dist = 10**5 # Store the smallest disance to a large no initially, can glitch if dist greater than this
    
    for box in boxes:
        cx = (box[0] + box[2])/2
        cy = (box[1] + box[3])/2
        
        box_dist = (cx - x)**2 + (cy - y)**2
        
        if box_dist < dist:
            offset = np.array([box[0] - x, box[1] - y, box[2] - x, box[3] - y])
            dist = box_dist
            
    # Should not glitch because matching is only done for positive indices
    return offset
    

def downsample(x):
    """
    x -> x/4
    
    Args:
        x (b, 3, 240, 240): Batch of 3 channel 240 x 240 images

    Returns:
        x_down (b, 3, 60, 60): Batch of 3 channel 60x60 images
    """
    pass

def loss(pred_class, pred_loc, gt_class, gt_loc, lambd=0.4):
    """
    Calculates weighted sum of classification and regression loss. Calls the classification loss and regression loss functions separately.
    
    Args:
        pred_class (b, 1, 60, 60): Network confidence probs for images
        pred_loc (b, 4, 60, 60): Network offsets for each location
        gt_class (b, 1, 60, 60): Gt class scores from encode
        gt_loc (b, 4, 60, 60): Gt regression offsets from encode
        lambd (scalar): WEighting factor comparison regression loss to 
        
    Returns:
        loss: Scalar value of 
    """
    return classification_loss(pred_class, gt_class) + lambd * regression_loss(pred_loc, gt_loc, gt_class)

def classification_loss(pred_class, gt_class):
    """
    Classification loss from mean squared diff between probabilities. Should probably use cross entropy instead but usng this now for simplicity.
    
    Also does hard negative mining. so requires generation of a selction mask of positives and most overconfident negatives.
    
    Args:
        pred_class (b, 1, 60, 60): Network confidence probs
        gt_class (b, 1, 60, 60): Binary gt confidence probs
        
    Returns:
        class_loss: Scalar
    """
    abs_loss = (pred_class - gt_class) ** 2
    mask = selection_mask(abs_loss, gt_class)
    selected_loss = abs_loss * mask
    
    return selection_mask.sum()/pred_class.shape[0]

def regression_loss(pred_loc, gt_loc):
    """
    Regression loss from vanilla mean squared diff between shifts.
    
    Args:
        pred_loc (b, 4, 60, 60): Network offsets for top left and bottom right of box. Should 
        gt_loc (b, 4, 60, 60): Ground truth offsets for top left and bottom right of box.  
        gt_class (b, 1, 60, 60): Offsets for positive examples
        
    Returns:
        reg_loss: Scalar
    """
    abs_loss = ((pred_loc - gt_loc) ** 2).sum(axis=1) # Check dims in test
    selected_loss = abs_loss * gt_class
    
    return selected_loss.sum()/pred_loc.shape[0]

def selection_mask(abs_loss, gt_class):
    """
    Returns a binary mask from absolute mean square classification loss and the ground truth mask
    
    Args:
        abs_loss (b, 1, 60, 60): Absolute probability loss value over each pixel
        gt_class (b, 1, 60, 60): Binary gt probs to set the positive pixels to one
        
    Returns;
        select_mask (b, 1, 60, 60): Selection mask for poth positive and negative pixels0
    """
    yinv = 1 - gt_class
    
    l_neg = yinv*abs_class

    select_mask = np.zeros(gt_class.shape)

    for num,i in enumerate(l_neg):
        indices = np.argsort(i.data,axis=None )

        matrix_indices = np.unravel_index(indices,(3,3))
        matrix_indices_flipped = np.fliplr(matrix_indices)

        k_value= np.sum(y_star[num,:,:])
       
        k_value = int(k_value)

        matrix_indices_flipped = matrix_indices_flipped[:,0:k_value]

        select_mask[num,0,matrix_indices_flipped[0],matrix_indices_flipped[1]]= 1
        
        select_mask[num,0,:,:] =select_mask[num,0,:,:] + y_star[num,0,:,:]
        
    return select_mask 

class DenseBoxes(Chain):
    def __init__(self):
        super(DenseBoxes, self).__init__(
        # List of all the layers in denseboxes here.
    )

## Tests
Writing tests for all of these methods. Checking all of them should be sufficient for this code.

In [15]:
import unittest
from unittest import TestSuite

class TestCode(unittest.TestCase):
    
    def test_dataset(self):
        """ Test that datast works """
        print('lola')
        pass
    
    def test_encode_y(self):
        """ LOGIC: Manually worked out example with batch size of two. Trivial category. """
        # Visualizing should also be fine
        # Encoded boxes in a couple of boxes should look like the distance transform
        print('lola')
        
    def test_downsample(self):
        """ LOGIC: Test that shape matches. """
        # Visualizing should also be fine
        pass
    
    def test_predict(self):
        """ LOGIC: What does this method do? """
        # Visualizing should also be fine
        pass
    
    def test_network(self):
        """ LOGIC: Test that the shape is correct? """
        pass
    
    def test_loss(self):
        """ LOGIC: Calculate sample loss for the network. Trivial category. """
        pass
    
    def test_reg_loss(self):
        """ LOGIC: Calculate regression loss for the network with batch size of two. Trivial category. """
        pass
    
    def test_class_loss(self):
        """ LOGIC: Calculate classification loss for the network with batch size of two. Trivial category. """
        pass
    
    def test_convergence(self):
        """ Check that for the one and two squares datasets, the network loss convergence. Plot the loss and show that it converges. """
        
ts = TestSuite()
#ts.ad
ts.addTests([TestCode('test_encode_y')])
unittest.TextTestRunner().run(ts)
#alltests = unittest.TestSuite([fast, slow])

.

lola



----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>