# Some experiments regarding the loss function for the line finder framework

In [16]:
import os, sys
sys.path.append('C:\\Users\\matthias\\Documents\\myProjects\\TU_Bibliothek\\code\\baseline-extract')
import torch
import torch.nn as nn
from scipy.optimize import linear_sum_assignment

In [141]:
pred = torch.tensor([[56,124,0.1, 55, 0.8], [2,54,0.12, 52, 0.3], [102, 95, 0.05, 59, 0.99], [193,234, -0.2, 51, 0.9], [654,234, 0.25, 52, 0.6], [4,3450, 0.8, 32, 0.1], [345,6, 0.12, 57, 0.6], [23,634, 0.18, 45, 0.3], [345,64,0.1, 55, 0.8]])
label = torch.tensor([[100,100, 0, 64], [200,200, 0.1, 64], [300,300, 0.05, 64]])

In [157]:
pred = torch.tensor([[100, 100,0.1, 55, 0.99], [200,200,0.12, 52, 0.99], [300, 300, 0.05, 59, 0.99], [193,234, -0.2, 51, 0.009], [654,234, 0.25, 52, 0.006], [4,3450, 0.8, 32, 0.001], [345,6, 0.12, 57, 0.006], [23,634, 0.18, 45, 0.003], [345,64,0.1, 55, 0.008]])
label = torch.tensor([[100,100, 0, 64], [200,200, 0.1, 64], [300,300, 0.05, 64]])

In [26]:
pred.shape

torch.Size([9, 5])

In [27]:
label.shape

torch.Size([3, 4])

In [28]:
crit = nn.MSELoss()

In [29]:
pred = pred.float()
label = label.float()

In [33]:
#crit(label[0], pred[0,0:2])

In [31]:
N = pred.shape[0]
M = label.shape[0]

In [32]:
cost = torch.zeros(N, M)

In [11]:
for n in range(N):
    for m in range(M):
        cost[n, m] = crit(pred[n,0:2], label[m])

In [12]:
X = linear_sum_assignment(cost)

In [13]:
cost

tensor([[1.2560e+03, 1.3256e+04, 4.5256e+04],
        [5.8600e+03, 3.0260e+04, 7.4660e+04],
        [1.4500e+01, 1.0314e+04, 4.0614e+04],
        [1.3262e+04, 2.5625e+03, 1.1862e+04],
        [1.6244e+05, 1.0364e+05, 6.4836e+04],
        [5.6159e+06, 5.3005e+06, 5.0051e+06],
        [3.4430e+04, 2.9330e+04, 4.4230e+04],
        [1.4554e+05, 1.0984e+05, 9.4142e+04],
        [3.0660e+04, 1.9760e+04, 2.8860e+04]])

In [14]:
X

(array([0, 2, 3], dtype=int64), array([0, 1, 2], dtype=int64))

In [198]:
import torch
import torch.nn as nn
from scipy.optimize import linear_sum_assignment
import time
from src.utils.distances import point_line_distance


class LineFinderLoss(nn.Module):
    def __init__(self, alpha=0.01):
        super(LineFinderLoss, self).__init__()
        self.mse = nn.MSELoss()
        self.alpha = alpha

    def forward(self, pred, label):
        batch_size = pred.shape[0]
        n_tot = pred.shape[1]
        m_tot = label.shape[1]

        loss = 0

        for b in range(batch_size):
            # I get P predictions and T true labels.
            inp = pred[b, :, 0:4]
            targ = label[b, :, :]

            conf_scores = pred[b, :, 4]
            print(conf_scores)

            # Compute the confidence for all P predictions.
            log_c = torch.log(conf_scores + 0.00001)
            log_c_anti = torch.log(1 - conf_scores + 0.00001)

            # Expand such that for all T true lables I have a row of all predicted confidence logs.
            # The result is a P x T matrix.
            log_c_exp = log_c[:, None].expand(-1, targ.shape[0])
            log_c_anti_exp = log_c_anti[:, None].expand(-1, targ.shape[0])

            # Expand such that I get P x T x 4 matrices.
            inp_exp = inp[:, None, :].expand(-1, targ.shape[0], -1)
            targ_exp = targ[None, :, :].expand(inp.shape[0], -1, -1)

            # Compute the difference between every pair of prediction and true label locations.
            diff = (inp_exp[:, :, 0:4] - targ_exp[:, :, 0:4])
            normed_diff = torch.norm(diff, 2, 2) ** 2

            # Loss = Sum_{n=0}^N Sum_{m=0}^M    X_nm [alpha*MSE(l_n, p_m) - Log(c_m)] - (1- X_nm) Log(1-c_m)
            # where:
            #   N:      prediction dimension
            #   M:      label dimension
            #   X_mn:   linear assignement matrix
            #   l_n:    label coordinates
            #   p_m:    prediction coordinates
            #   c_m:    confidence scores

            # Compute the cost matrix. This is a P x T matrix.
            C = self.alpha*normed_diff - log_c_exp + log_c_anti_exp

            X = torch.zeros(C.shape)
            x_c = torch.ones(C.shape[0])

            # For every column index (true), compute the row index (pred) where the cost is minimal.
            inp_idx, targ_idx = linear_sum_assignment(C)
            X[(inp_idx, targ_idx)] = 1.0
            x_c[inp_idx] = 0.0
            
            location_loss = (self.alpha*normed_diff*X).sum()
            confidence_loss = -(log_c_exp*X).sum() - (log_c_anti*x_c).sum()

            loss = location_loss + confidence_loss

        return loss

In [199]:
lfl = LineFinderLoss()

In [200]:
p = pred.unsqueeze(0)
l = label.unsqueeze(0)

In [201]:
d = lfl(p,l)

tensor([0.9900, 0.9900, 0.9900, 0.0090, 0.0060, 0.0010, 0.0060, 0.0030, 0.0080])


In [202]:
lfl(p,l)

tensor([0.9900, 0.9900, 0.9900, 0.0090, 0.0060, 0.0010, 0.0060, 0.0030, 0.0080])


tensor(2.5633)

In [203]:
d

tensor(2.5633)

In [208]:
X.data

tensor([[1., 0., 0.],
        [0., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [115]:
X_c

tensor([[0., 1., 1.],
        [1., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [210]:
X.device

device(type='cpu')