In [None]:
import sys
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

In [43]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [44]:
a = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
mask = torch.tensor([[True, True, True], [False, False, True], [False, True, True]])
a[mask]

tensor([1, 2, 3, 6, 8, 9])

In [181]:
def weighted_cross_entropy(pred_conf, target_conf, neg_weights=1e-3):
    """
    Args:
        pred_conf: (tensor) shape [batch_size, num_priors, num_classes]
        target_conf: (tensor) shape [batch_size, num_priors]
        neg_weights: (float)
    Returns:
        loss: (tensor) shape [1]
    """

    # reshape them to [batch_size * num_priors, num_classes], [batch_Size * num_priors, -1]
    pred_conf = pred_conf.view(-1, pred_conf.size(-1))
    target_conf = target_conf.view(-1)
    pos_mask = target_conf == 0
    pred_conf = F.softmax(pred_conf, dim=-1)
    target_conf = target_conf.long().unsqueeze(1)  # [N, 1]
    nll = -torch.log(
      pred_conf.gather(dim=1, index=target_conf)
    ).squeeze(1)   # [N]
    nll[pos_mask] = nll[pos_mask] * neg_weights
    return nll.sum()

In [None]:
from src.utils import matches
class CustomMultiBoxLoss(nn.Module):
    def __init__(self, threshold, priors, conf_weight = 1, neg_weight=1e-3):
        super().__init__()
        self.threshold = threshold
        self.priors = priors
        self.conf_weight = conf_weight
        self.neg_weight = neg_weight

    def encode(self, gt_list, matched_priors_boxes, variances=[0.1, 0.1, 0.2, 0.2]):
        """
        Args:
            gt_list: List of 1D tensors (absolute GT coords)
            matched_priors_boxes: 2D tensor (corresponding anchor priors)
        Returns:
            List of 1D tensors (encoded offsets)
        """
        if len(gt_list) == 0:
            return []

        matched_gt = torch.stack(gt_list)

        g_cx = (matched_gt[:, 0] + matched_gt[:, 2]) / 2
        g_cy = (matched_gt[:, 1] + matched_gt[:, 3]) / 2
        g_w  = matched_gt[:, 2] - matched_gt[:, 0]
        g_h  = matched_gt[:, 3] - matched_gt[:, 1]

        p_cx = matched_priors_boxes[:, 0]
        p_cy = matched_priors_boxes[:, 1]
        p_w  = matched_priors_boxes[:, 2]
        p_h  = matched_priors_boxes[:, 3]

        # SSD Encoding Math
        enc_cx = (g_cx - p_cx) / (p_w * variances[0])
        enc_cy = (g_cy - p_cy) / (p_h * variances[1])
        enc_w  = torch.log(g_w / p_w + 1e-5) / variances[2]
        enc_h  = torch.log(g_h / p_h + 1e-5) / variances[3]

        return torch.stack([enc_cx, enc_cy, enc_w, enc_h], dim=1)

    def forward(self, preds, targets):
        """
            Args:
                preds: (tuple) includes loc, conf
                    loc: (tensor) shape [batch_size, num_priors, 4]
                    conf: (tensor) shape [batch_size, num_priors, num_classes] (num_classes include background as 0)

                targets: (list) shape [batch_size, num_object_i, 5]
        """
        device = preds[0].device
        pred_loc, pred_conf = preds

        batch_size = pred_loc.size(0)
        num_priors = pred_loc.size(1)

        # loss matching
        matched_priors, matched_gt_boxes = [], []
        # create a tensor that has shape [batch_size, num_priors]
        target_conf = torch.zeros((batch_size, num_priors))
        for i in range(batch_size):
            matched_gt_boxes_one = []
            # calculate jaccard scores
            truth_indexes = matches(self.threshold, torch.as_tensor(targets[i], device=device), self.priors)
            match_priors_one = pred_loc[i][truth_indexes != -1] # match those doesn't predict bg
            for truth_id in truth_indexes:
                if truth_id != -1:
                    matched_gt_boxes_one.append(torch.as_tensor(targets[i][truth_id], device=device)[:-1])
            matched_priors.extend(match_priors_one)
            matched_gt_boxes.extend(self.encode(matched_gt_boxes_one, self.priors[truth_indexes != -1]))

            for j in range(num_priors):
                target_conf[i, j] = targets[i][truth_indexes[j]][-1]
        matched_priors = torch.cat(matched_priors, dim=0).to(device)
        matched_gt_boxes = torch.cat(matched_gt_boxes, dim=0).to(device)

        loc_loss = F.smooth_l1_loss(matched_priors, matched_gt_boxes, reduction='sum') / batch_size

        # conf loss
        target_conf = target_conf.to(device)
        conf_loss = weighted_cross_entropy(pred_conf, target_conf, neg_weights=self.neg_weight) / batch_size * self.conf_weight


        return loc_loss, conf_loss



In [None]:
from src.prior_box import PriorBox, voc
prior_gen = PriorBox(voc)
prior_boxes = prior_gen.forward()
prior_boxes = prior_boxes

In [184]:
loss_fn = CustomMultiBoxLoss(0.5, prior_boxes, conf_weight=10, neg_weight=1e-4)

# AI TEST

In [185]:
import torch.optim as optim
def test_convergence():
    print("--- Starting Convergence Test ---")

    # 1. Setup Data
    # Fixed priors: (cx, cy, w, h)
    priors = torch.tensor([
        [0.5, 0.5, 0.2, 0.2], # Center anchor
        [0.2, 0.2, 0.1, 0.1], # Top-left anchor
        [0.8, 0.8, 0.1, 0.1]  # Bottom-right anchor
    ], dtype=torch.float32)

    # Ground Truth: One object in the middle (class 1), one top left (class 2)
    # Format: [x1, y1, x2, y2, label]
    target_data = [[
        [0.4, 0.4, 0.6, 0.6, 1.0], # Matches prior 0 perfectly
        [0.15, 0.15, 0.25, 0.25, 2.0] # Matches prior 1
    ]]

    # 2. Setup Model (Simulated)
    # Pred Loc: [1 batch, 3 priors, 4 coords]
    # Pred Conf: [1 batch, 3 priors, 3 classes (0=bg, 1, 2)]

    # Initialize with random noise
    pred_loc = torch.randn(1, 3, 4, requires_grad=True)
    pred_conf = torch.randn(1, 3, 3, requires_grad=True)

    optimizer = optim.SGD([pred_loc, pred_conf], lr=1e-2, momentum=0.9)
    criterion = CustomMultiBoxLoss(threshold=0.5, priors=priors, conf_weight=100)

    # 3. Training Loop
    print(f"{'Iter':<10} | {'Loc Loss':<10} | {'Conf Loss':<10} | {'Total':<10}")
    print("-" * 46)

    for i in range(100):
        optimizer.zero_grad()

        preds = (pred_loc, pred_conf)
        loss_l, loss_c = criterion(preds, target_data)
        total_loss = loss_l + loss_c

        total_loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"{i:<10} | {loss_l.item():<10.4f} | {loss_c.item():<10.4f} | {total_loss.item():<10.4f}")

    # 4. Final Validation
    print("-" * 46)
    print("Final Predictions Check:")

    # Check Confidence (Should classify Prior 0 as Class 1, Prior 1 as Class 2)
    probs = F.softmax(pred_conf, dim=2)
    print("Prior 0 (Center) Class Probs:", probs[0, 0].detach().numpy().round(2))
    print("Prior 1 (Top-L)  Class Probs:", probs[0, 1].detach().numpy().round(2))
    print("Prior 2 (Bot-R)  Class Probs:", probs[0, 2].detach().numpy().round(2))

    # Check Localization (Should be close to 0 offsets for perfect matches)
    # Since GT matches Priors almost perfectly in this data, offsets should converge to 0
    print("\nPrior 0 Loc Offsets (Target ~0):", pred_loc[0, 0].detach().numpy().round(2))

if __name__ == "__main__":
    test_convergence()

--- Starting Convergence Test ---
Iter       | Loc Loss   | Conf Loss  | Total     
----------------------------------------------
0          | 3.1875     | 250.8980   | 254.0855  
10         | 1.7884     | 0.0425     | 1.8309    
20         | 0.3953     | 0.0019     | 0.3973    
30         | 0.0319     | 0.0007     | 0.0326    
40         | 0.0549     | 0.0005     | 0.0554    
50         | 0.0262     | 0.0004     | 0.0266    
60         | 0.0029     | 0.0004     | 0.0033    
70         | 0.0010     | 0.0004     | 0.0014    
80         | 0.0011     | 0.0004     | 0.0015    
90         | 0.0003     | 0.0004     | 0.0007    
----------------------------------------------
Final Predictions Check:
Prior 0 (Center) Class Probs: [0. 1. 0.]
Prior 1 (Top-L)  Class Probs: [0. 0. 1.]
Prior 2 (Bot-R)  Class Probs: [0. 0. 1.]

Prior 0 Loc Offsets (Target ~0): [ 0.  0. -0. -0.]
