In [1]:
import random
import os
import time
import gc 
import math
from collections import OrderedDict
from typing import Callable, Dict, List, Optional, Tuple

from matplotlib import pyplot as plt
from IPython import display
import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import DataLoader, Subset, RandomSampler
import numpy as np
from scipy.optimize import linear_sum_assignment
import torchvision
from torchvision.ops import box_convert, generalized_box_iou
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms import v2 as T
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, ExtraFPNBlock
from torchvision.ops.misc import Conv2dNormActivation
import timm
from timm.layers import resample_abs_pos_embed 
from tqdm.auto import tqdm
from pprint import pformat
from torchmetrics.detection.mean_ap import MeanAveragePrecision

from dl_toolbox.transforms import NormalizeBB
from dl_toolbox.utils import list_of_dicts_to_dict_of_lists

  from .autonotebook import tqdm as notebook_tqdm


### Model

In [2]:
class Scale(nn.Module):

    def __init__(self, init_value=1.0):
        super(Scale, self).__init__()
        self.scale = nn.Parameter(torch.FloatTensor([init_value]))

    def forward(self, x):
        return x * self.scale


class Head(nn.Module):

    def __init__(self, in_channels, n_classes, n_share_convs=4, n_feat_levels=5):
        super().__init__()
        
        tower = []
        for _ in range(n_share_convs):
            tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            tower.append(nn.GroupNorm(32, in_channels))
            tower.append(nn.ReLU())
        self.shared_layers = nn.Sequential(*tower)

        self.cls_logits = nn.Conv2d(in_channels,
                                    n_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.ctrness = nn.Conv2d(in_channels,
                                 1,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(n_feat_levels)])

    def forward(self, x):
        cls_logits = []
        bbox_preds = []
        cness_preds = []
        for l, features in enumerate(x):
            features = self.shared_layers(features)
            cls_logits.append(self.cls_logits(features).flatten(-2))
            cness_preds.append(self.ctrness(features).flatten(-2))
            reg = self.bbox_pred(features)
            reg = self.scales[l](reg)
            bbox_preds.append(nn.functional.relu(reg).flatten(-2))
        all_logits = torch.cat(cls_logits, dim=-1).permute(0,2,1) # BxNumAnchorsxC
        all_box_regs = torch.cat(bbox_preds, dim=-1).permute(0,2,1) # BxNumAnchorsx4
        all_cness = torch.cat(cness_preds, dim=-1).permute(0,2,1) # BxNumAnchorsx1
        return all_logits, all_box_regs, all_cness

class LayerNorm2d(nn.LayerNorm):
    """ LayerNorm for channels of '2D' spatial NCHW tensors """
    def __init__(self, num_channels, eps=1e-6, affine=True):
        super().__init__(num_channels, eps=eps, elementwise_affine=affine)

    def forward(self, x):
        return nn.functional.layer_norm(
            x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)

class SimpleFeaturePyramidNetwork(nn.Module):
    """
    Module that adds a Simple FPN from on top of a set of feature maps. This is based on
    `"Exploring Plain Vision Transformer Backbones for Object Detection" <https://arxiv.org/abs/2203.16527>`_.

    Unlike regular FPN, Simple FPN expects a single feature map,
    on which the Simple FPN will be added.

    Args:
        in_channels (int): number of channels for the input feature map that
            is passed to the module
        out_channels (int): number of channels of the Simple FPN representation
        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
            be performed. It is expected to take the fpn features, the original
            features and the names of the original features as input, and returns
            a new list of feature maps and their corresponding names
        norm_layer (callable, optional): Module specifying the normalization layer to use. Default: LayerNorm

    Examples::
    
        >>> vitdet = ViTDet(256, 10)
        >>> x = torch.rand(2, 3, 224, 224)
        >>> feat_dict = vitdet.forward_feat(x)
        >>> features = list(feat_dict.values())
        >>> print(f'{[f.shape for f in features] = }')
        >>> box_cls, box_regression, centerness = vitdet.head(features)
        >>> print(f'{box_cls.shape = }')
        >>> assert sum([f.shape[2]*f.shape[3] for f in features])==box_cls.shape[1]

        DOES NOT WORK BELOW
        >>> m = torchvision.ops.SimpleFeaturePyramidNetwork(10, 5)
        >>> # get some dummy data
        >>> x = torch.rand(1, 10, 64, 64)
        >>> # compute the Simple FPN on top of x
        >>> output = m(x)
        >>> print([(k, v.shape) for k, v in output.items()])
        >>> # returns
        >>>   [('feat0', torch.Size([1, 5, 64, 64])),
        >>>    ('feat2', torch.Size([1, 5, 16, 16])),
        >>>    ('feat3', torch.Size([1, 5, 8, 8]))]

    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        extra_blocks: Optional[ExtraFPNBlock] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ):
        super().__init__()
        self.blocks = nn.ModuleList()
        for block_index in range(0,4):
            layers = []
            current_in_channels = in_channels
            if block_index == 0:
                layers.extend([
                    nn.ConvTranspose2d(
                        in_channels,
                        in_channels // 2,
                        kernel_size=2,
                        stride=2,
                    ),
                    norm_layer(in_channels // 2),
                    nn.GELU(),
                    nn.ConvTranspose2d(
                        in_channels // 2,
                        in_channels // 4,
                        kernel_size=2,
                        stride=2,
                    ),
                ])
                current_in_channels = in_channels // 4
            elif block_index == 1:
                layers.append(
                    nn.ConvTranspose2d(
                        in_channels,
                        in_channels // 2,
                        kernel_size=2,
                        stride=2,
                    ),
                )
                current_in_channels = in_channels // 2
            elif block_index == 2:
                # nothing to do for this scale
                pass
            elif block_index == 3:
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

            layers.extend([
                Conv2dNormActivation(
                    current_in_channels,
                    out_channels,
                    kernel_size=1,
                    padding=0,
                    norm_layer=norm_layer,
                    activation_layer=None
                ),
                Conv2dNormActivation(
                    out_channels,
                    out_channels,
                    kernel_size=3,
                    norm_layer=norm_layer,
                    activation_layer=None
                )
            ])
            self.blocks.append(nn.Sequential(*layers))

        if extra_blocks is not None:
            if not isinstance(extra_blocks, ExtraFPNBlock):
                raise TypeError(f"extra_blocks should be of type ExtraFPNBlock not {type(extra_blocks)}")
        self.extra_blocks = extra_blocks

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        """
        Computes the Simple FPN for a feature map.

        Args:
            x (Tensor): input feature map.

        Returns:
            results (list[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        results = [block(x) for block in self.blocks]
        names = [f"{i}" for i in range(len(self.blocks))]

        if self.extra_blocks is not None:
            results, names = self.extra_blocks(results, [x], names)

        # make it back an OrderedDict
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out
    
class ViTDet(nn.Module):
    
    def __init__(self, out_channels, num_classes):
        super(ViTDet, self).__init__()
        #self.backbone = timm.create_model('samvit_base_patch16.sa1b', pretrained=True)
        self.backbone = timm.create_model(
            'vit_tiny_patch16_224',
            pretrained=True,
            dynamic_img_size=True #Deals with inputs of other size than pretraining
        )
        self.sfpn = SimpleFeaturePyramidNetwork(
            in_channels=192,
            out_channels=out_channels,
            #extra_blocks=LastLevelP6P7(out_channels,out_channels),
            norm_layer=LayerNorm2d
        )
        self.head = Head(out_channels, num_classes, n_feat_levels=6) # 6=4+2extrablocks
        
    def forward_feat(self, x):
        intermediates = self.backbone.forward_intermediates(x, indices=1, norm=False, intermediates_only=True)
        features = self.sfpn(intermediates[0])
        return features
    
    def forward(self, x):
        feat_dict = self.forward_feat(x)
        features = list(feat_dict.values())
        box_cls, box_regression, centerness = self.head(features)
        return box_cls, box_regression, centerness

### Dataset

In [3]:
class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = read_image(img_path)
        mask = read_image(mask_path)
        # instances are encoded as different colors
        obj_ids = torch.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set
        # of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get bounding box coordinates for each mask
        boxes = masks_to_boxes(masks)

        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)
        h, w = T.functional.get_size(img)
        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=(h,w))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return {'image': img, 'target': target}
    
    def __len__(self):
        return len(self.imgs)
    
def collate(batch):
    batch = list_of_dicts_to_dict_of_lists(batch)
    batch['image'] = torch.stack(batch['image'])
    return batch

### Loss

In [4]:
class LossEvaluator(nn.Module):

    def __init__(self, num_classes):
        super(LossEvaluator, self).__init__()
        self.centerness_loss_func = nn.BCEWithLogitsLoss(reduction="sum")
        self.num_classes = num_classes
                
    def __call__(self, cls_logits, reg_preds, cness_preds, cls_tgts, reg_tgts):
        pos_inds_b, pos_inds_loc = torch.nonzero(cls_tgts > 0, as_tuple=True)
        num_pos = len(pos_inds_b)
        reg_preds = reg_preds[pos_inds_b, pos_inds_loc, :]
        reg_tgts = reg_tgts[pos_inds_b, pos_inds_loc, :]
        cness_preds = cness_preds[pos_inds_b, pos_inds_loc, :].squeeze(-1)
        cness_tgts = self._compute_centerness_targets(reg_tgts)
        cls_loss = self._get_cls_loss(cls_logits, cls_tgts, max(num_pos, 1.))
        reg_loss, centerness_loss = 0,0
        if num_pos > 0:
            reg_loss = self._get_reg_loss(
                reg_preds, reg_tgts, cness_tgts)
            centerness_loss = self._get_centerness_loss(
                cness_preds, cness_tgts, num_pos)
        losses = {}
        losses["cls_loss"] = cls_loss
        losses["reg_loss"] = reg_loss
        losses["centerness_loss"] = centerness_loss
        losses["combined_loss"] = cls_loss + reg_loss + centerness_loss
        return losses
    
    def _compute_centerness_targets(self, reg_tgts):
        """
        Args:
            reg_tgts: l, t, r, b values to regress, shape BxNumAx4
        Returns:
            A tensor of shape BxNumA giving how centered each anchor is for the bbox it must regress
        """
        if len(reg_tgts) == 0:
            return reg_tgts.new_zeros(len(reg_tgts))
        left_right = reg_tgts[..., [0, 2]]
        top_bottom = reg_tgts[..., [1, 3]]
        centerness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \
                    (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
        return torch.sqrt(centerness)

    def _get_cls_loss(self, cls_preds, cls_targets, num_pos_samples):
        """
        cls_targets takes values in 0...C, 0 only when there is no obj to be detected for the anchor
        """
        onehot = nn.functional.one_hot(cls_targets.long(), self.num_classes+1)[...,1:].float()
        cls_loss = torchvision.ops.sigmoid_focal_loss(cls_preds, onehot)
        return cls_loss.sum() / num_pos_samples

    def _get_reg_loss(self, reg_preds, reg_targets, centerness_targets):
        ltrb_preds = reg_preds.reshape(-1, 4)
        ltrb_tgts = reg_targets.reshape(-1, 4)
        xyxy_preds = torch.cat([-ltrb_preds[:,:2], ltrb_preds[:,2:]], dim=1) 
        xyxy_tgts = torch.cat([-ltrb_tgts[:,:2], ltrb_tgts[:,2:]], dim=1)
        reg_losses = torchvision.ops.distance_box_iou_loss(xyxy_preds, xyxy_tgts, reduction='none')
        sum_centerness_targets = centerness_targets.sum()
        reg_loss = (reg_losses * centerness_targets).sum() / sum_centerness_targets
        return reg_loss

    def _get_centerness_loss(self, centerness_preds, centerness_targets,
                             num_pos_samples):
        centerness_loss = self.centerness_loss_func(centerness_preds,
                                                    centerness_targets)
        return centerness_loss / num_pos_samples

### Matching

In [5]:
INF = 100000000

def get_fm_anchors(h, w, s):
    """
    Args:
        h, w: height, width of the feat map
        s: stride of the featmap = size reduction factor relative to image
    Returns:
        Tensor NumAnchorsInFeatMap x 2, ordered by column 
        TODO: check why: DONE: it corresponds to how locs are computed in 
        https://github.com/tianzhi0549/FCOS/blob/master/fcos_core/modeling/rpn/fcos/fcos.py
        When flattening feat maps, we see first the line at H(=y) fixed and W(=x) moving
        
    """
    locs_x = [s / 2 + x * s for x in range(w)]
    locs_y = [s / 2 + y * s for y in range(h)]
    locs = [(x, y) for y in locs_y for x in locs_x] # order !
    return torch.tensor(locs)

def get_all_anchors_bb_sizes(fm_sizes, fm_strides, bb_sizes):
    """
    Args:
        fm_sizes: seq of feature_maps sizes
        fm_strides: seq of corresponding strides
        bb_sizes: seq of bbox sizes feature maps are associated with, len = len(fm) + 1
    Returns:
        anchors: list of num_featmaps elem, where each elem indicates the tensor of anchors of size Nx2 in the original image corresponding to each location in the feature map at this level
        anchors_bb_sizes: sizes of the bbox each anchor is authorized/supposed to detect
    """
    bb_sizes = [-1] + bb_sizes + [INF]
    anchors, anchors_bb_sizes = [], []
    for l, ((h,w), s) in enumerate(zip(fm_sizes, fm_strides)):
        fm_anchors = get_fm_anchors(h, w, s)
        sizes = torch.tensor([bb_sizes[l], bb_sizes[l+1]], dtype=torch.float32)
        sizes = sizes.repeat(len(fm_anchors)).view(len(fm_anchors), 2)
        anchors.append(fm_anchors)
        anchors_bb_sizes.append(sizes)
    return torch.cat(anchors, 0), torch.cat(anchors_bb_sizes, 0)

In [6]:
def calculate_reg_targets(anchors, bbox):
    """
    Args:
        anchors: Lx2, anchors coordinates
        bbox: tensor of bbox Tx4, format should be xywh
    Returns:
        reg_tgt: l,t,r,b values to regress for each pair (anchor, bbox)
        anchor_in_box: whether anchor is in bbox for each pair (anchor, bbox)
    """
    xs, ys = anchors[:, 0], anchors[:, 1] # L & L, x & y reversed ?? x means position on x-axis
    l = xs[:, None] - bbox[:, 0][None] # Lx1 - 1xT -> LxT
    t = ys[:, None] - bbox[:, 1][None]
    r = bbox[:, 2][None] + bbox[:, 0][None] - xs[:, None]
    b = bbox[:, 3][None] + bbox[:, 1][None] - ys[:, None]  
    #print(xs[0], ys[0], l[0], t[0], r[0], b[0])
    return torch.stack([l, t, r, b], dim=2) # LxTx4

def apply_distance_constraints(reg_targets, anchor_sizes):
    """
    Args:
        reg_targets: LxTx4
        anchor_bb_sizes: Lx2
    Returns:
        A LxT tensor where value at (anchor, bbox) is true if the max value to regress at this anchor for this bbox is inside the bounds associated to this anchor
        If other values to regress than the max are negatives, it is dealt with anchor_in_boxes.
    """
    max_reg_targets, _ = reg_targets.max(dim=2) # LxT
    min_reg_targets, _ = reg_targets.min(dim=2) # LxT
    dist_constraints = torch.stack([
        min_reg_targets > 0,
        max_reg_targets >= anchor_sizes[:, None, 0],
        max_reg_targets <= anchor_sizes[:, None, 1]
    ])
    return torch.all(dist_constraints, dim=0)

def anchor_bbox_area(bbox, anchors, fits_to_feature_level):
    """
    Args: bbox is XYWH
    Returns: 
        Tensor LxT where value at (anchor, bbox) is the area of bbox if anchor is in bbox and anchor is associated with bbox of that size
        Else INF.
    """
    #bbox_areas = _calc_bbox_area(bbox_targets) # T
    bbox_areas = bbox[:, 2] * bbox[:, 3] # T
    # area of each target bbox repeated for each loc with inf where the the loc is not 
    # in the target bbox or if the loc is not at the right level for this bbox size
    anchor_bbox_area = bbox_areas[None].repeat(len(anchors), 1) # LxT
    anchor_bbox_area[~fits_to_feature_level] = INF
    return anchor_bbox_area

def associate_targets_to_anchors(targets_batch, anchors, anchors_bb_sizes):
    """
    Associate one target cls/bbox to regress ONLY to each anchor: among the bboxes that contain the anchor and have the right size, pick that of min area.
    If no tgt exists for an anchor, the tgt class is 0.
    inputs:
        targets_batch: list of dict of tv_tensors {'labels':, 'boxes':}; boxes should be in XYWH format
        anchors: 
        anchor_bb_sizes:
    outputs:
        all class targets: BxNumAnchors
        all bbox targets: BxNumAnchorsx4
    """
    all_reg_targets, all_cls_targets = [], []
    for targets in targets_batch:
        bbox_targets = targets['boxes'] # Tx4, format XYWH
        cls_targets = targets['labels'] # T
        reg_targets = calculate_reg_targets(
            anchors, bbox_targets) # LxTx4, LxT
        fits_to_feature_level = apply_distance_constraints(
            reg_targets, anchors_bb_sizes) # LxT
        locations_to_gt_area = anchor_bbox_area(
            bbox_targets, anchors, fits_to_feature_level)
        # Core of the anchor/target association
        if cls_targets.shape[0]>0:
            loc_min_area, loc_min_idxs = locations_to_gt_area.min(dim=1) #L,idx in [0,T-1],T must be>0
            reg_targets = reg_targets[range(len(anchors)), loc_min_idxs] # Lx4
            cls_targets = cls_targets[loc_min_idxs] # L
            cls_targets[loc_min_area == INF] = 0 # 0 is no-obj category
        else:
            cls_targets = cls_targets.new_zeros((len(anchors),))
            reg_targets = reg_targets.new_zeros((len(anchors),4))
        all_cls_targets.append(cls_targets)
        all_reg_targets.append(reg_targets)
    # BxL & BxLx4
    return torch.stack(all_cls_targets), torch.stack(all_reg_targets)

### Post-processing predictions to boxes

In [7]:
pre_nms_thresh=0.3
pre_nms_top_n=100000
nms_thresh=0.45
fpn_post_nms_top_n=50
min_size=0

def post_process(logits, ltrb, cness, input_size):
    probas = logits.sigmoid() # LxC
    high_probas = probas > pre_nms_thresh # LxC
    # Indices on L and C axis of high prob pairs anchor/class
    high_prob_anchors_idx, high_prob_cls = high_probas.nonzero(as_tuple=True) # dim l <= L*C
    high_prob_cls += 1 # 0 is for no object
    high_prob_ltrb = ltrb[high_prob_anchors_idx] # lx4
    high_prob_anchors = anchors[high_prob_anchors_idx] # lx2
    # Tensor shape l with values from logits*cness such that logits > pre_nms_thresh 
    cness_modulated_probas = probas * cness.sigmoid() # LxC
    high_prob_scores = cness_modulated_probas[high_probas] # l
    # si l est trop longue
    if high_probas.sum().item() > pre_nms_top_n:
        # Filter the pre_nms_top_n most probable pairs 
        high_prob_scores, top_k_indices = high_prob_scores.topk(
            pre_nms_top_n, sorted=False) 
        high_prob_cls = high_prob_cls[top_k_indices]
        high_prob_ltrb = high_prob_ltrb[top_k_indices]
        high_prob_anchors = high_prob_anchors[top_k_indices]

    # Rewrites bbox (x0,y0,x1,y1) from reg targets (l,t,r,b) following eq (1) in paper
    high_prob_boxes = torch.stack([
        high_prob_anchors[:, 0] - high_prob_ltrb[:, 0],
        high_prob_anchors[:, 1] - high_prob_ltrb[:, 1],
        high_prob_anchors[:, 0] + high_prob_ltrb[:, 2],
        high_prob_anchors[:, 1] + high_prob_ltrb[:, 3],
    ], dim=1)

    high_prob_boxes = torchvision.ops.clip_boxes_to_image(high_prob_boxes, input_size)
    big_enough_box_idxs = torchvision.ops.remove_small_boxes(high_prob_boxes, min_size)
    boxes = high_prob_boxes[big_enough_box_idxs]
    # Why not do that on scores and classes too ? 
    classes = high_prob_cls[big_enough_box_idxs]
    scores = high_prob_scores[big_enough_box_idxs]
    #high_prob_scores = torch.sqrt(high_prob_scores) # WHY SQRT ? REmOVED
    # NMS expects boxes to be in xyxy format
    nms_idxs = torchvision.ops.nms(boxes, scores, nms_thresh)
    boxes = boxes[nms_idxs]
    scores = scores[nms_idxs]
    classes = classes[nms_idxs]
    if len(nms_idxs) > fpn_post_nms_top_n:
        image_thresh, _ = torch.kthvalue(
            scores.cpu(),
            len(nms_idxs) - fpn_post_nms_top_n + 1)
        keep = scores >= image_thresh.item()
        #keep = torch.nonzero(keep).squeeze(1)
        boxes, scores, classes = boxes[keep], scores[keep], classes[keep]
    # Then back to xywh boxes for preds and metric computation
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    
    # Isn't this cond auto valid from the beginning filter ?
    #keep = scores >= pre_nms_thresh
    #boxes, scores, classes = boxes[keep], scores[keep], classes[keep]
    return boxes, scores, classes 

def post_process_batch(
    cls_preds, # B x L x C 
    reg_preds, # B x L x 4
    cness_preds, # B x L x 1
    input_size
): 
    preds = []
    for logits, ltrb, cness in zip(cls_preds, reg_preds, cness_preds):
        boxes, scores, classes = post_process(logits, ltrb, cness, input_size)
        preds.append({'boxes': boxes, 'scores': scores, 'labels': classes})
    return preds

### Seeds

In [8]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

### Instanciations

In [9]:
tf = T.Compose(
    [
        T.ToDtype(torch.float, scale=True),
        T.Resize(size=480, max_size=640),
        T.RandomCrop(size=(640,640), pad_if_needed=True, fill=0),
        T.ConvertBoundingBoxFormat(format='XYWH'),
        T.SanitizeBoundingBoxes(),
        #T.ToPureTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
)

dataset = PennFudanDataset('/data/PennFudanPed', tf)
dataset_test = PennFudanDataset('/data/PennFudanPed', tf)
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
train_set = torch.utils.data.Subset(dataset, indices[:-50])
val_set = torch.utils.data.Subset(dataset_test, indices[-50:])

train_dataloader = DataLoader(
    batch_size=1,
    num_workers=0,
    pin_memory=True,
    dataset=train_set,
    sampler=RandomSampler(
        train_set,
        replacement=True,
        num_samples=100*2
    ),
    drop_last=True,
    collate_fn=collate
)

val_dataloader = DataLoader(
    batch_size=1,
    num_workers=0,
    pin_memory=True,
    dataset=val_set,
    shuffle=False,
    drop_last=False,
    collate_fn=collate
)

In [10]:
# Freeze params here if needed
    
#for param in model.feature_extractor.parameters():
#    param.requires_grad = False

anchors, anchor_sizes = get_all_anchors_bb_sizes(
    fm_sizes=[(160,160),(80,80),(40,40),(20,20)], # 640/16 * [4,2,1,0.5]
    fm_strides=[4, 8, 16, 32],
    bb_sizes=[128, 256, 512]
)

model = ViTDet(num_classes=1, out_channels=256)
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#dev = torch.device("cpu")
model.to(dev)
eval_losses = LossEvaluator(
    num_classes=1
)

train_params = list(filter(lambda p: p[1].requires_grad, model.named_parameters()))
nb_train = sum([int(torch.numel(p[1])) for p in train_params])
nb_tot = sum([int(torch.numel(p)) for p in model.parameters()])
print(f"Training {nb_train} params out of {nb_tot}")

#optimizer = torch.optim.SGD(
#    params=[p[1] for p in train_params],
#    lr=0.005,
#    momentum=0.9,
#    weight_decay=0.0005
#)

optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=1e-3,
    betas=(0.9,0.999),
    weight_decay=5e-2,
    eps=1e-8,
)

lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-3,
    steps_per_epoch=len(train_dataloader),
    epochs=100)
#lr_scheduler = torch.optim.lr_scheduler.LinearLR(
#    optimizer=optimizer,
#    start_factor=1.,
#    end_factor=0.1,
#    total_iters=20
#)

Training 10758500 params out of 10758500


### Training

In [11]:
#gc.collect()
#torch.cuda.empty_cache()
#gc.collect()

start_epoch = 0
for epoch in range(start_epoch, 100):
    time_ep = time.time()
    
    valid_loss = 0
    valid_cls_loss = 0
    valid_reg_loss = 0
    valid_cen_loss = 0
    model.eval()
    with torch.no_grad():
        map_metric = MeanAveragePrecision(
            box_format='xywh', # make sure your dataset outputs target in xywh format
            backend='faster_coco_eval'
        )
        for batch in tqdm(val_dataloader, total=len(val_dataloader)):
            cls_tgts, reg_tgts = associate_targets_to_anchors(
                batch['target'],
                anchors,
                anchor_sizes
            )
            image = batch["image"].to(dev)
            cls_logits, bbox_reg, centerness = model(image)
            losses = eval_losses(
                cls_logits,
                bbox_reg,
                centerness,
                cls_tgts.to(dev),
                reg_tgts.to(dev)
            )
            loss = losses['combined_loss']
            valid_loss += loss.detach().item()
            valid_cls_loss += losses["cls_loss"].detach().item()
            valid_reg_loss += losses["reg_loss"].detach().item()
            valid_cen_loss += losses["centerness_loss"].detach().item()
            b,c,h,w = image.shape
            preds = post_process_batch(
                cls_logits.to("cpu"),
                bbox_reg.to("cpu"),
                centerness.to("cpu"),
                (h,w)
            )
            map_metric.update(preds, batch['target'])
        valid_loss /= len(val_dataloader)
        valid_cls_loss /= len(val_dataloader)
        valid_reg_loss /= len(val_dataloader)
        valid_cen_loss /= len(val_dataloader)
        mapmetrics = map_metric.compute()
        print(f"{epoch = }")
        print(f"{valid_loss = }")
        print(f"{valid_cls_loss = }")
        print(f"{valid_reg_loss = }")
        print(f"{valid_cen_loss = }")
        print(pformat(mapmetrics))
        map_metric.reset()
    train_loss = 0
    train_cls_loss = 0
    train_reg_loss = 0
    train_cen_loss = 0
    model.train()
    for batch in tqdm(train_dataloader, total=len(train_dataloader)):
        image = batch["image"].to(dev)               
        optimizer.zero_grad()
        cls_logits, bbox_reg, centerness = model(image)
        cls_tgts, reg_tgts = associate_targets_to_anchors(
            batch['target'],
            anchors,
            anchor_sizes
        ) # BxNumAnchors, BxNumAnchorsx4    
        losses = eval_losses(
            cls_logits,
            bbox_reg,
            centerness,
            cls_tgts.to(dev),
            reg_tgts.to(dev)
        )
        loss = losses['combined_loss']
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        train_loss += loss.detach().item()
        train_cls_loss += losses["cls_loss"].detach().item()
        train_reg_loss += losses["reg_loss"].detach().item()
        train_cen_loss += losses["centerness_loss"].detach().item()
    train_loss /= len(train_dataloader)
    train_cls_loss /= len(train_dataloader)
    train_reg_loss /= len(train_dataloader)
    train_cen_loss /= len(train_dataloader)
    print(f"{epoch = }")
    print(f"lr = {lr_scheduler.get_last_lr()[0]}"),
    print(f"{train_loss = }")
    print(f"{train_cls_loss = }")
    print(f"{train_reg_loss = }")
    print(f"{train_cen_loss = }")
    
    time_ep = time.time() - time_ep

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:07<00:00,  2.56s/it]


epoch = 0
valid_loss = 4.570200605392456
valid_cls_loss = 2.8458824336528776
valid_reg_loss = 1.0354415011405944
valid_cen_loss = 0.6888766968250275
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.),
 'map_50': tensor(0.),
 'map_75': tensor(0.),
 'map_large': tensor(0.),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.),
 'mar_10': tensor(0.),
 'mar_100': tensor(0.),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.81it/s]


epoch = 0
lr = 4.2630366137540415e-05
train_loss = 1.9044084018468856
train_cls_loss = 0.24045929174870254
train_reg_loss = 1.0354000735282898
train_cen_loss = 0.6285490369796753


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.54it/s]


epoch = 1
valid_loss = 1.854781937599182
valid_cls_loss = 0.20799286738038064
valid_reg_loss = 1.0343887543678283
valid_cen_loss = 0.6124003183841705
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.),
 'map_50': tensor(0.),
 'map_75': tensor(0.),
 'map_large': tensor(0.),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.),
 'mar_10': tensor(0.),
 'mar_100': tensor(0.),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.81it/s]


epoch = 1
lr = 5.0492636108421976e-05
train_loss = 1.7847205948829652
train_cls_loss = 0.14654838796705008
train_reg_loss = 1.0321072179079056
train_cen_loss = 0.6060649824142456


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.44it/s]


epoch = 2
valid_loss = 1.8423284029960632
valid_cls_loss = 0.2214021673798561
valid_reg_loss = 1.0153265726566314
valid_cen_loss = 0.6055996692180634
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.),
 'map_50': tensor(0.),
 'map_75': tensor(0.),
 'map_large': tensor(0.),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.),
 'mar_10': tensor(0.),
 'mar_100': tensor(0.),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 2
lr = 6.350064054307949e-05
train_loss = 1.5335031539201736
train_cls_loss = 0.18311305411159992
train_reg_loss = 0.7330014505982398
train_cen_loss = 0.6173886474967003


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.93it/s]


epoch = 3
valid_loss = 1.4369921970367432
valid_cls_loss = 0.18674474343657493
valid_reg_loss = 0.629165325164795
valid_cen_loss = 0.6210821223258972
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1111),
 'map_50': tensor(0.4222),
 'map_75': tensor(0.0170),
 'map_large': tensor(0.1290),
 'map_medium': tensor(0.0069),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0880),
 'mar_10': tensor(0.1895),
 'mar_100': tensor(0.2045),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2398),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 3
lr = 8.151181354825528e-05
train_loss = 1.3073997706174851
train_cls_loss = 0.13184211790561676
train_reg_loss = 0.5682296781241893
train_cen_loss = 0.6073279771208763


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.87it/s]


epoch = 4
valid_loss = 1.3980400013923644
valid_cls_loss = 0.19601939246058464
valid_reg_loss = 0.599433256983757
valid_cen_loss = 0.6025873494148254
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.0908),
 'map_50': tensor(0.3412),
 'map_75': tensor(0.0137),
 'map_large': tensor(0.1057),
 'map_medium': tensor(0.0014),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0842),
 'mar_10': tensor(0.2000),
 'mar_100': tensor(0.2218),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2602),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]


epoch = 4
lr = 0.00010432875520907355
train_loss = 1.2790304714441298
train_cls_loss = 0.14032711043953897
train_reg_loss = 0.5337618283927441
train_cen_loss = 0.6049415370821953


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.31it/s]


epoch = 5
valid_loss = 1.371737916469574
valid_cls_loss = 0.1915132987499237
valid_reg_loss = 0.5705960464477539
valid_cen_loss = 0.6096285724639893
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1339),
 'map_50': tensor(0.4680),
 'map_75': tensor(0.0382),
 'map_large': tensor(0.1567),
 'map_medium': tensor(0.0035),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1143),
 'mar_10': tensor(0.2632),
 'mar_100': tensor(0.2805),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3292),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.76it/s]


epoch = 5
lr = 0.00013170139506425542
train_loss = 1.2585374504327773
train_cls_loss = 0.1443712931498885
train_reg_loss = 0.5098312239348889
train_cen_loss = 0.6043349322676659


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.81it/s]


epoch = 6
valid_loss = 1.4220083141326905
valid_cls_loss = 0.2524347487092018
valid_reg_loss = 0.557370069026947
valid_cen_loss = 0.612203494310379
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1097),
 'map_50': tensor(0.3778),
 'map_75': tensor(0.0264),
 'map_large': tensor(0.1271),
 'map_medium': tensor(0.0208),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0962),
 'mar_10': tensor(0.2323),
 'mar_100': tensor(0.2526),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2947),
 'mar_medium': tensor(0.0200),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 6
lr = 0.00016332973284309213
train_loss = 1.2193678605556488
train_cls_loss = 0.14599513560533522
train_reg_loss = 0.4681146916747093
train_cen_loss = 0.6052580305933952


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.99it/s]


epoch = 7
valid_loss = 1.357437915802002
valid_cls_loss = 0.21538873702287675
valid_reg_loss = 0.5026441860198975
valid_cen_loss = 0.6394049954414368
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1743),
 'map_50': tensor(0.5805),
 'map_75': tensor(0.0420),
 'map_large': tensor(0.2045),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1301),
 'mar_10': tensor(0.2707),
 'mar_100': tensor(0.2842),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3345),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.76it/s]


epoch = 7
lr = 0.00019886712642608328
train_loss = 1.2042622303962707
train_cls_loss = 0.14624780662357806
train_reg_loss = 0.45364793211221693
train_cen_loss = 0.6043664935231209


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.00it/s]


epoch = 8
valid_loss = 1.2647386813163757
valid_cls_loss = 0.17915082812309266
valid_reg_loss = 0.4746346354484558
valid_cen_loss = 0.6109532189369201
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2202),
 'map_50': tensor(0.6670),
 'map_75': tensor(0.0494),
 'map_large': tensor(0.2582),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1376),
 'mar_10': tensor(0.2962),
 'mar_100': tensor(0.3098),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3646),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:23<00:00,  8.70it/s]


epoch = 8
lr = 0.0002379240909936571
train_loss = 1.1725226759910583
train_cls_loss = 0.15809993527829647
train_reg_loss = 0.406094693094492
train_cen_loss = 0.6083280456066131


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.46it/s]


epoch = 9
valid_loss = 1.4392381954193114
valid_cls_loss = 0.3346230810880661
valid_reg_loss = 0.4794835954904556
valid_cen_loss = 0.6251315200328826
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1100),
 'map_50': tensor(0.4318),
 'map_75': tensor(0.0082),
 'map_large': tensor(0.1287),
 'map_medium': tensor(0.0139),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0744),
 'mar_10': tensor(0.2466),
 'mar_100': tensor(0.2820),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3301),
 'mar_medium': tensor(0.0133),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]


epoch = 9
lr = 0.00028007256772484015
train_loss = 1.164393764436245
train_cls_loss = 0.16415862292051314
train_reg_loss = 0.3898171383887529
train_cen_loss = 0.6104180061817169


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.67it/s]


epoch = 10
valid_loss = 1.2388693177700043
valid_cls_loss = 0.20933797508478164
valid_reg_loss = 0.41216541051864625
valid_cen_loss = 0.6173659265041351
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2050),
 'map_50': tensor(0.6110),
 'map_75': tensor(0.0411),
 'map_large': tensor(0.2383),
 'map_medium': tensor(0.0075),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1256),
 'mar_10': tensor(0.3105),
 'mar_100': tensor(0.3429),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3894),
 'mar_medium': tensor(0.1067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 10
lr = 0.0003248506152615381
train_loss = 1.12825496673584
train_cls_loss = 0.15638189639896155
train_reg_loss = 0.36502049267292025
train_cen_loss = 0.6068525767326355


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.95it/s]


epoch = 11
valid_loss = 1.2656659615039825
valid_cls_loss = 0.20387523338198663
valid_reg_loss = 0.43049712300300597
valid_cen_loss = 0.6312936055660248
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1794),
 'map_50': tensor(0.5961),
 'map_75': tensor(0.0219),
 'map_large': tensor(0.2101),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1188),
 'mar_10': tensor(0.2669),
 'mar_100': tensor(0.2759),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3248),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 11
lr = 0.000371767472520644
train_loss = 1.154264419078827
train_cls_loss = 0.16648796536028385
train_reg_loss = 0.3791844702512026
train_cen_loss = 0.6085919818282127


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.95it/s]


epoch = 12
valid_loss = 1.222894834280014
valid_cls_loss = 0.21986298963427545
valid_reg_loss = 0.39270633697509766
valid_cen_loss = 0.6103255105018616
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1784),
 'map_50': tensor(0.5769),
 'map_75': tensor(0.0354),
 'map_large': tensor(0.2080),
 'map_medium': tensor(0.0030),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1429),
 'mar_10': tensor(0.2782),
 'mar_100': tensor(0.2842),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3319),
 'mar_medium': tensor(0.0200),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 12
lr = 0.0004203089373662653
train_loss = 1.1203000739216804
train_cls_loss = 0.16403166510164738
train_reg_loss = 0.3535011406987906
train_cen_loss = 0.6027672651410103


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.96it/s]


epoch = 13
valid_loss = 1.2773105907440185
valid_cls_loss = 0.23733104169368743
valid_reg_loss = 0.41834857285022736
valid_cen_loss = 0.6216309845447541
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1762),
 'map_50': tensor(0.4917),
 'map_75': tensor(0.0694),
 'map_large': tensor(0.2068),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1293),
 'mar_10': tensor(0.2684),
 'mar_100': tensor(0.2684),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3159),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.70it/s]


epoch = 13
lr = 0.00046994300219258336
train_loss = 1.1018867698311805
train_cls_loss = 0.15381716903299092
train_reg_loss = 0.3465454034507275
train_cen_loss = 0.6015241986513138


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.35it/s]


epoch = 14
valid_loss = 1.203785685300827
valid_cls_loss = 0.22158458828926086
valid_reg_loss = 0.37382513731718064
valid_cen_loss = 0.6083759546279908
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2309),
 'map_50': tensor(0.6970),
 'map_75': tensor(0.0948),
 'map_large': tensor(0.2692),
 'map_medium': tensor(0.0056),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1556),
 'mar_10': tensor(0.3338),
 'mar_100': tensor(0.3361),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3903),
 'mar_medium': tensor(0.0400),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 14
lr = 0.0005201256846521496
train_loss = 1.1687019044160842
train_cls_loss = 0.18597741633653642
train_reg_loss = 0.37066698968410494
train_cen_loss = 0.6120574977993966


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.96it/s]


epoch = 15
valid_loss = 1.3016180539131164
valid_cls_loss = 0.23655320584774017
valid_reg_loss = 0.44011237561702726
valid_cen_loss = 0.6249524736404419
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1275),
 'map_50': tensor(0.5329),
 'map_75': tensor(0.0186),
 'map_large': tensor(0.1489),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0977),
 'mar_10': tensor(0.2368),
 'mar_100': tensor(0.2459),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2894),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 15
lr = 0.0005703069896256616
train_loss = 1.175437308549881
train_cls_loss = 0.19342098891735077
train_reg_loss = 0.3704371852427721
train_cen_loss = 0.6115791317820549


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.68it/s]


epoch = 16
valid_loss = 1.2631787300109862
valid_cls_loss = 0.2617907884716988
valid_reg_loss = 0.387419151365757
valid_cen_loss = 0.6139687895774841
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2115),
 'map_50': tensor(0.6076),
 'map_75': tensor(0.0678),
 'map_large': tensor(0.2477),
 'map_medium': tensor(0.0035),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1421),
 'mar_10': tensor(0.3188),
 'mar_100': tensor(0.3368),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3956),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 16
lr = 0.0006199369370908696
train_loss = 1.1323073941469193
train_cls_loss = 0.17337786361575128
train_reg_loss = 0.35285217367112637
train_cen_loss = 0.6060773584246636


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.99it/s]


epoch = 17
valid_loss = 1.4026880550384522
valid_cls_loss = 0.3599850100278854
valid_reg_loss = 0.41493745923042297
valid_cen_loss = 0.6277655935287476
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1205),
 'map_50': tensor(0.3519),
 'map_75': tensor(0.0265),
 'map_large': tensor(0.1409),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1173),
 'mar_10': tensor(0.1865),
 'mar_100': tensor(0.1865),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2195),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 17
lr = 0.0006684715898260222
train_loss = 1.1739817944169044
train_cls_loss = 0.1875375123694539
train_reg_loss = 0.3708539319038391
train_cen_loss = 0.6155903500318527


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.85it/s]


epoch = 18
valid_loss = 1.3662177777290345
valid_cls_loss = 0.26877386778593065
valid_reg_loss = 0.4843842768669128
valid_cen_loss = 0.6130596363544464
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.0420),
 'map_50': tensor(0.1906),
 'map_75': tensor(0.0048),
 'map_large': tensor(0.0487),
 'map_medium': tensor(0.0031),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0368),
 'mar_10': tensor(0.1549),
 'mar_100': tensor(0.2639),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3088),
 'mar_medium': tensor(0.0133),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.72it/s]


epoch = 18
lr = 0.0007153790148850699
train_loss = 1.1864407977461815
train_cls_loss = 0.19927356019616127
train_reg_loss = 0.37217360705137253
train_cen_loss = 0.6149936318397522


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.92it/s]


epoch = 19
valid_loss = 1.322247052192688
valid_cls_loss = 0.2395534636080265
valid_reg_loss = 0.4614692097902298
valid_cen_loss = 0.6212243723869324
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1016),
 'map_50': tensor(0.3990),
 'map_75': tensor(0.0251),
 'map_large': tensor(0.1187),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0684),
 'mar_10': tensor(0.2143),
 'mar_100': tensor(0.2263),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2664),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:23<00:00,  8.69it/s]


epoch = 19
lr = 0.0007601451135077022
train_loss = 1.1900277319550514
train_cls_loss = 0.1989364355430007
train_reg_loss = 0.3751520444452763
train_cen_loss = 0.6159392476081849


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.99it/s]


epoch = 20
valid_loss = 1.2473041605949402
valid_cls_loss = 0.2229857811331749
valid_reg_loss = 0.4003553596138954
valid_cen_loss = 0.6239630174636841
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1346),
 'map_50': tensor(0.4757),
 'map_75': tensor(0.0344),
 'map_large': tensor(0.1580),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1105),
 'mar_10': tensor(0.2639),
 'mar_100': tensor(0.2669),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3142),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.79it/s]


epoch = 20
lr = 0.0008022792555692243
train_loss = 1.1851224845647812
train_cls_loss = 0.2029639868065715
train_reg_loss = 0.3662799184024334
train_cen_loss = 0.6158785739541054


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.70it/s]


epoch = 21
valid_loss = 1.2465760231018066
valid_cls_loss = 0.22509042471647261
valid_reg_loss = 0.3987590569257736
valid_cen_loss = 0.622726548910141
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1802),
 'map_50': tensor(0.5688),
 'map_75': tensor(0.0178),
 'map_large': tensor(0.2114),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1301),
 'mar_10': tensor(0.2857),
 'mar_100': tensor(0.2857),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3363),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:23<00:00,  8.69it/s]


epoch = 21
lr = 0.0008413196568174989
train_loss = 1.146890150308609
train_cls_loss = 0.18357272535562516
train_reg_loss = 0.3543390186131001
train_cen_loss = 0.608978407382965


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.90it/s]


epoch = 22
valid_loss = 1.2484378838539123
valid_cls_loss = 0.22434668600559235
valid_reg_loss = 0.40831359326839445
valid_cen_loss = 0.6157776093482972
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1725),
 'map_50': tensor(0.5799),
 'map_75': tensor(0.0570),
 'map_large': tensor(0.1999),
 'map_medium': tensor(0.0289),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1278),
 'mar_10': tensor(0.2955),
 'mar_100': tensor(0.3301),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3788),
 'mar_medium': tensor(0.0733),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 22
lr = 0.000876838439963195
train_loss = 1.1490809699892999
train_cls_loss = 0.19133191250264645
train_reg_loss = 0.3488973243534565
train_cen_loss = 0.6088517278432846


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.03it/s]


epoch = 23
valid_loss = 1.3054478943347931
valid_cls_loss = 0.28085193783044815
valid_reg_loss = 0.405687637925148
valid_cen_loss = 0.618908314704895
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1814),
 'map_50': tensor(0.5689),
 'map_75': tensor(0.0438),
 'map_large': tensor(0.2129),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1383),
 'mar_10': tensor(0.2880),
 'mar_100': tensor(0.2880),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3389),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 23
lr = 0.0009084463241545082
train_loss = 1.1713784298300742
train_cls_loss = 0.19602549709379674
train_reg_loss = 0.36224113203585145
train_cen_loss = 0.6131118005514145


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.65it/s]


epoch = 24
valid_loss = 1.2728550934791565
valid_cls_loss = 0.23452383518218994
valid_reg_loss = 0.4199833446741104
valid_cen_loss = 0.6183479130268097
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1452),
 'map_50': tensor(0.5166),
 'map_75': tensor(0.0295),
 'map_large': tensor(0.1692),
 'map_medium': tensor(0.0347),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0925),
 'mar_10': tensor(0.2617),
 'mar_100': tensor(0.2662),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3088),
 'mar_medium': tensor(0.0333),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 24
lr = 0.000935796891440371
train_loss = 1.1316157013177872
train_cls_loss = 0.17992167301476003
train_reg_loss = 0.34536085702478886
train_cen_loss = 0.6063331723213196


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.05it/s]


epoch = 25
valid_loss = 1.2676978123188019
valid_cls_loss = 0.25144035190343855
valid_reg_loss = 0.3998811602592468
valid_cen_loss = 0.6163762927055358
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1698),
 'map_50': tensor(0.4989),
 'map_75': tensor(0.0502),
 'map_large': tensor(0.1984),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1226),
 'mar_10': tensor(0.2586),
 'mar_100': tensor(0.2586),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3044),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 25
lr = 0.00095859038346232
train_loss = 1.160029198229313
train_cls_loss = 0.20018100436776876
train_reg_loss = 0.35150728046894075
train_cen_loss = 0.6083409079909324


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.60it/s]


epoch = 26
valid_loss = 1.3124344229698182
valid_cls_loss = 0.2520655199885368
valid_reg_loss = 0.43549617767333987
valid_cen_loss = 0.6248727262020111
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1281),
 'map_50': tensor(0.4953),
 'map_75': tensor(0.0195),
 'map_large': tensor(0.1486),
 'map_medium': tensor(0.0347),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0955),
 'mar_10': tensor(0.2346),
 'mar_100': tensor(0.2827),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3283),
 'mar_medium': tensor(0.0333),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 26
lr = 0.00097657698676381
train_loss = 1.1583123609423638
train_cls_loss = 0.1928509099036455
train_reg_loss = 0.3528684414923191
train_cen_loss = 0.6125930088758469


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.72it/s]


epoch = 27
valid_loss = 1.2545703196525573
valid_cls_loss = 0.23381512701511384
valid_reg_loss = 0.40385332524776457
valid_cen_loss = 0.6169018650054932
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1398),
 'map_50': tensor(0.4076),
 'map_75': tensor(0.0451),
 'map_large': tensor(0.1635),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.0940),
 'mar_10': tensor(0.2707),
 'mar_100': tensor(0.2805),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3301),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 27
lr = 0.0009895595707104512
train_loss = 1.1896401980519296
train_cls_loss = 0.20388034153729678
train_reg_loss = 0.3693798165768385
train_cen_loss = 0.6163800397515297


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.43it/s]


epoch = 28
valid_loss = 1.259595640897751
valid_cls_loss = 0.264798903465271
valid_reg_loss = 0.38172967225313187
valid_cen_loss = 0.6130670654773712
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1538),
 'map_50': tensor(0.4571),
 'map_75': tensor(0.0560),
 'map_large': tensor(0.1799),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1383),
 'mar_10': tensor(0.2744),
 'mar_100': tensor(0.2744),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3230),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.70it/s]


epoch = 28
lr = 0.0009973958480139476
train_loss = 1.1495850956439972
train_cls_loss = 0.1901218181848526
train_reg_loss = 0.3533931948244572
train_cen_loss = 0.6060700818896294


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.99it/s]


epoch = 29
valid_loss = 1.2970997309684753
valid_cls_loss = 0.2869164535403252
valid_reg_loss = 0.39221232026815417
valid_cen_loss = 0.6179709577560425
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1393),
 'map_50': tensor(0.5015),
 'map_75': tensor(0.0237),
 'map_large': tensor(0.1634),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1293),
 'mar_10': tensor(0.2602),
 'mar_100': tensor(0.2602),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3062),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 29
lr = 0.0009999999874112692
train_loss = 1.170163624584675
train_cls_loss = 0.19934747703373432
train_reg_loss = 0.3611428602039814
train_cen_loss = 0.6096732807159424


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.97it/s]


epoch = 30
valid_loss = 1.2712799942493438
valid_cls_loss = 0.26139652222394943
valid_reg_loss = 0.39491682916879656
valid_cen_loss = 0.6149666488170624
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1320),
 'map_50': tensor(0.4253),
 'map_75': tensor(0.0582),
 'map_large': tensor(0.1537),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1241),
 'mar_10': tensor(0.2414),
 'mar_100': tensor(0.2429),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2858),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 30
lr = 0.000999491488906453
train_loss = 1.1497522240877152
train_cls_loss = 0.18749628886580466
train_reg_loss = 0.353060025498271
train_cen_loss = 0.609195912182331


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.76it/s]


epoch = 31
valid_loss = 1.2801474118232727
valid_cls_loss = 0.245744781345129
valid_reg_loss = 0.3960122525691986
valid_cen_loss = 0.6383903765678406
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1471),
 'map_50': tensor(0.4885),
 'map_75': tensor(0.0377),
 'map_large': tensor(0.1727),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1083),
 'mar_10': tensor(0.2752),
 'mar_100': tensor(0.2835),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3336),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.77it/s]


epoch = 31
lr = 0.0009979770850443537
train_loss = 1.1344149175286293
train_cls_loss = 0.18629400011152028
train_reg_loss = 0.34183319501578807
train_cen_loss = 0.6062877237796783


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.93it/s]


epoch = 32
valid_loss = 1.2583451437950135
valid_cls_loss = 0.24717615216970443
valid_reg_loss = 0.3909767085313797
valid_cen_loss = 0.6201922905445099
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1807),
 'map_50': tensor(0.5523),
 'map_75': tensor(0.0764),
 'map_large': tensor(0.2120),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1406),
 'mar_10': tensor(0.2962),
 'mar_100': tensor(0.2962),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3487),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.79it/s]


epoch = 32
lr = 0.0009954598256328086
train_loss = 1.1419506496191025
train_cls_loss = 0.1887270049750805
train_reg_loss = 0.3428398948162794
train_cen_loss = 0.6103837513923644


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.99it/s]


epoch = 33
valid_loss = 1.2270871078968049
valid_cls_loss = 0.23295256227254868
valid_reg_loss = 0.3850962468981743
valid_cen_loss = 0.6090383040904999
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2072),
 'map_50': tensor(0.6407),
 'map_75': tensor(0.0360),
 'map_large': tensor(0.2398),
 'map_medium': tensor(0.0373),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1165),
 'mar_10': tensor(0.3233),
 'mar_100': tensor(0.3398),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3867),
 'mar_medium': tensor(0.1000),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 33
lr = 0.0009919447800972691
train_loss = 1.0950710347294808
train_cls_loss = 0.17060224540531635
train_reg_loss = 0.3234100418537855
train_cen_loss = 0.601058748960495


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.95it/s]


epoch = 34
valid_loss = 1.2320858108997346
valid_cls_loss = 0.22694634988904
valid_reg_loss = 0.37842759877443316
valid_cen_loss = 0.6267118585109711
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1837),
 'map_50': tensor(0.5857),
 'map_75': tensor(0.0685),
 'map_large': tensor(0.2136),
 'map_medium': tensor(0.0139),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1383),
 'mar_10': tensor(0.2797),
 'mar_100': tensor(0.2820),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3301),
 'mar_medium': tensor(0.0133),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.71it/s]


epoch = 34
lr = 0.0009874390272716525
train_loss = 1.0930761283636092
train_cls_loss = 0.16212396752089261
train_reg_loss = 0.3274197454750538
train_cen_loss = 0.6035324138402939


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.00it/s]


epoch = 35
valid_loss = 1.2879620277881623
valid_cls_loss = 0.2688609017431736
valid_reg_loss = 0.3969916480779648
valid_cen_loss = 0.622109466791153
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1535),
 'map_50': tensor(0.5250),
 'map_75': tensor(0.0382),
 'map_large': tensor(0.1800),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1278),
 'mar_10': tensor(0.2602),
 'mar_100': tensor(0.2677),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3150),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]


epoch = 35
lr = 0.000981951641142512
train_loss = 1.1063109067082406
train_cls_loss = 0.16918087657541037
train_reg_loss = 0.3349624191969633
train_cen_loss = 0.6021676057577133


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.93it/s]


epoch = 36
valid_loss = 1.242058016061783
valid_cls_loss = 0.23331218600273132
valid_reg_loss = 0.3901923236250877
valid_cen_loss = 0.6185535085201264
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1565),
 'map_50': tensor(0.5379),
 'map_75': tensor(0.0473),
 'map_large': tensor(0.1832),
 'map_medium': tensor(0.0069),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1165),
 'mar_10': tensor(0.2504),
 'mar_100': tensor(0.2519),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2956),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]


epoch = 36
lr = 0.0009754936725752357
train_loss = 1.0996855786442756
train_cls_loss = 0.17079241905361414
train_reg_loss = 0.32783258937299253
train_cen_loss = 0.6010605725646019


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.66it/s]


epoch = 37
valid_loss = 1.279657461643219
valid_cls_loss = 0.2522018751502037
valid_reg_loss = 0.41402914732694623
valid_cen_loss = 0.6134264373779297
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1716),
 'map_50': tensor(0.5474),
 'map_75': tensor(0.0313),
 'map_large': tensor(0.2011),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1068),
 'mar_10': tensor(0.2880),
 'mar_100': tensor(0.2932),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3451),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.74it/s]


epoch = 37
lr = 0.0009680781270590751
train_loss = 1.0870029148459435
train_cls_loss = 0.16827053312212228
train_reg_loss = 0.3173671278357506
train_cen_loss = 0.6013652580976486


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.94it/s]


epoch = 38
valid_loss = 1.2208687269687652
valid_cls_loss = 0.22705426931381226
valid_reg_loss = 0.3767026153206825
valid_cen_loss = 0.6171118378639221
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1946),
 'map_50': tensor(0.6034),
 'map_75': tensor(0.0779),
 'map_large': tensor(0.2272),
 'map_medium': tensor(0.0074),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1233),
 'mar_10': tensor(0.3188),
 'mar_100': tensor(0.3346),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3885),
 'mar_medium': tensor(0.0400),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 38
lr = 0.0009597199385158189
train_loss = 1.102399517893791
train_cls_loss = 0.1726223338767886
train_reg_loss = 0.3266926968097687
train_cen_loss = 0.6030844885110855


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.66it/s]


epoch = 39
valid_loss = 1.268483978509903
valid_cls_loss = 0.24402828261256218
valid_reg_loss = 0.3930867999792099
valid_cen_loss = 0.6313689064979553
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1473),
 'map_50': tensor(0.4450),
 'map_75': tensor(0.0312),
 'map_large': tensor(0.1722),
 'map_medium': tensor(0.0017),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1165),
 'mar_10': tensor(0.2594),
 'mar_100': tensor(0.2594),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3044),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 39
lr = 0.0009504359392248641
train_loss = 1.0832774192094803
train_cls_loss = 0.16231244567781686
train_reg_loss = 0.3205079750716686
train_cen_loss = 0.6004569986462593


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.27it/s]


epoch = 40
valid_loss = 1.194223676919937
valid_cls_loss = 0.2058141940832138
valid_reg_loss = 0.3695949837565422
valid_cen_loss = 0.6188145053386688
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1822),
 'map_50': tensor(0.5508),
 'map_75': tensor(0.0620),
 'map_large': tensor(0.2126),
 'map_medium': tensor(0.0173),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1361),
 'mar_10': tensor(0.3030),
 'mar_100': tensor(0.3090),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3584),
 'mar_medium': tensor(0.0400),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.76it/s]


epoch = 40
lr = 0.0009402448259252435
train_loss = 1.0719247230887412
train_cls_loss = 0.16138931274414062
train_reg_loss = 0.3104084661602974
train_cen_loss = 0.6001269400119782


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.86it/s]


epoch = 41
valid_loss = 1.2620433783531189
valid_cls_loss = 0.2425130194425583
valid_reg_loss = 0.39901400834321976
valid_cen_loss = 0.6205163431167603
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1376),
 'map_50': tensor(0.4608),
 'map_75': tensor(0.0175),
 'map_large': tensor(0.1614),
 'map_medium': tensor(0.0069),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1023),
 'mar_10': tensor(0.2571),
 'mar_100': tensor(0.2617),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3071),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 41
lr = 0.000929167122162882
train_loss = 1.067229661345482
train_cls_loss = 0.16379123136401177
train_reg_loss = 0.303094819560647
train_cen_loss = 0.6003436121344566


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.60it/s]


epoch = 42
valid_loss = 1.2318152678012848
valid_cls_loss = 0.23209682375192642
valid_reg_loss = 0.38211481004953385
valid_cen_loss = 0.6176036381721497
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2068),
 'map_50': tensor(0.6101),
 'map_75': tensor(0.0767),
 'map_large': tensor(0.2430),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1353),
 'mar_10': tensor(0.3128),
 'mar_100': tensor(0.3135),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3690),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]


epoch = 42
lr = 0.0009172251369589075
train_loss = 1.0382198038697243
train_cls_loss = 0.14650294199585914
train_reg_loss = 0.299331029728055
train_cen_loss = 0.5923858338594437


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.75it/s]


epoch = 43
valid_loss = 1.2795903539657594
valid_cls_loss = 0.2664282463490963
valid_reg_loss = 0.396060619354248
valid_cen_loss = 0.617101491689682
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1695),
 'map_50': tensor(0.5480),
 'map_75': tensor(0.0377),
 'map_large': tensor(0.1991),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1218),
 'mar_10': tensor(0.2752),
 'mar_100': tensor(0.2850),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3354),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]


epoch = 43
lr = 0.0009044429198822497
train_loss = 1.0181884226202964
train_cls_loss = 0.13752731174230576
train_reg_loss = 0.2871160626411438
train_cen_loss = 0.5935450506210327


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.00it/s]


epoch = 44
valid_loss = 1.2096754479408265
valid_cls_loss = 0.21106788352131844
valid_reg_loss = 0.3876765584945679
valid_cen_loss = 0.6109310114383697
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1379),
 'map_50': tensor(0.4305),
 'map_75': tensor(0.0417),
 'map_large': tensor(0.1607),
 'map_medium': tensor(0.0004),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1083),
 'mar_10': tensor(0.2662),
 'mar_100': tensor(0.2850),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3345),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 44
lr = 0.0008908462126170102
train_loss = 1.0038299307227134
train_cls_loss = 0.13381182530894875
train_reg_loss = 0.27873248398303985
train_cen_loss = 0.5912856194376945


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 19.01it/s]


epoch = 45
valid_loss = 1.2544016063213348
valid_cls_loss = 0.2562096467614174
valid_reg_loss = 0.3842126655578613
valid_cen_loss = 0.6139792966842651
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1828),
 'map_50': tensor(0.5879),
 'map_75': tensor(0.0588),
 'map_large': tensor(0.2140),
 'map_medium': tensor(0.0054),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1248),
 'mar_10': tensor(0.2962),
 'mar_100': tensor(0.2970),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3460),
 'mar_medium': tensor(0.0267),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.76it/s]


epoch = 45
lr = 0.0008764623971221354
train_loss = 1.0204938662052154
train_cls_loss = 0.14265036525204777
train_reg_loss = 0.2872580546885729
train_cen_loss = 0.5905854451656342


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.80it/s]


epoch = 46
valid_loss = 1.2473805522918702
valid_cls_loss = 0.24451598301529884
valid_reg_loss = 0.3854181391000748
valid_cen_loss = 0.61744642496109
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.2011),
 'map_50': tensor(0.6432),
 'map_75': tensor(0.0555),
 'map_large': tensor(0.2350),
 'map_medium': tensor(0.0035),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1278),
 'mar_10': tensor(0.3180),
 'mar_100': tensor(0.3353),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3938),
 'mar_medium': tensor(0.0067),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.71it/s]


epoch = 46
lr = 0.000861320440487796
train_loss = 0.9749607568979264
train_cls_loss = 0.1207557594962418
train_reg_loss = 0.2670153622329235
train_cen_loss = 0.5871896344423294


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.45it/s]


epoch = 47
valid_loss = 1.3762449383735658
valid_cls_loss = 0.3703518095612526
valid_reg_loss = 0.3888108482956886
valid_cen_loss = 0.6170822763442994
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1437),
 'map_50': tensor(0.4296),
 'map_75': tensor(0.0274),
 'map_large': tensor(0.1684),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1248),
 'mar_10': tensor(0.2278),
 'mar_100': tensor(0.2278),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.2681),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.76it/s]


epoch = 47
lr = 0.0008454508365995225
train_loss = 0.9805250644683838
train_cls_loss = 0.1303615831024945
train_reg_loss = 0.26209300860762597
train_cen_loss = 0.5880704766511917


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:02<00:00, 18.87it/s]


epoch = 48
valid_loss = 1.258397102355957
valid_cls_loss = 0.24214180037379265
valid_reg_loss = 0.3992984837293625
valid_cen_loss = 0.6169568228721619
{'classes': tensor(1, dtype=torch.int32),
 'map': tensor(0.1619),
 'map_50': tensor(0.5524),
 'map_75': tensor(0.0365),
 'map_large': tensor(0.1901),
 'map_medium': tensor(0.),
 'map_per_class': tensor(-1.),
 'map_small': tensor(0.),
 'mar_1': tensor(0.1038),
 'mar_10': tensor(0.2857),
 'mar_100': tensor(0.2932),
 'mar_100_per_class': tensor(-1.),
 'mar_large': tensor(0.3451),
 'mar_medium': tensor(0.),
 'mar_small': tensor(0.)}


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.78it/s]


epoch = 48
lr = 0.0008288855447275769
train_loss = 0.9598958098888397
train_cls_loss = 0.11956895435228944
train_reg_loss = 0.25585189159959554
train_cen_loss = 0.5844749611616135


 66%|███████████████████████████████████████████████████████████████████████████████▏                                        | 33/50 [00:01<00:00, 18.74it/s]


KeyboardInterrupt: 