https://tryolabs.com/blog/2018/01/18/faster-r-cnn-down-the-rabbit-hole-of-modern-object-detection

https://www.kaggle.com/code/artgor/object-detection-with-pytorch-lightning

https://gitee.com/wgs-gill/a-PyTorch-Tutorial-to-Object-Detection

https://github.com/hse-ds/iad-deep-learning/blob/874790d122adce8f01fa207de8b36c37fbf56f53/2021/seminars/sem06/sem_06.ipynb#L742

https://github.com/open-mmlab/mmdetection/blob/main/demo/MMDet_Tutorial.ipynb


In [1]:
import sys
sys.path.append("..")
import math
import torch
import torch as th
from PIL import Image
import numpy as np

import torchvision as thv
# import torchmetrics as thm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import pytorch_lightning as pl

from src.datamodule import BarcodeDM
from src.config import Config
import albumentations as albu


In [2]:
cfg = Config.from_yaml("../config/baseline_detect.yml")
data = BarcodeDM(cfg.data_config, task=cfg.task, dry_run=True)

data.prepare_data()
data.setup()


In [97]:
class DetectModel(pl.LightningModule):
    def __init__(self, cfg: Config):
        super().__init__()
        self.model = thv.models.detection.fasterrcnn_resnet50_fpn(weights="COCO_V1")
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
        self.val_map = MeanAveragePrecision()
        self.test_map = MeanAveragePrecision()
    
    def forward(self, x: torch.Tensor):
        return self.model(x)
    
    def configure_optimizers(self):
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(
            params, lr=0.005, momentum=0.9, weight_decay=0.0005
        )
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, step_size=3, gamma=0.1
        )
        return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]
            
    def training_step(self, batch, batch_idx):
        """
        """
        images, targets = batch
        loss_dict = self.model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        self.log("train_loss", loss, on_step=True, on_epoch=True)
        # return loss
        print(f"training_step:end:{loss=} {loss_dict=}")
        return {
            'loss': loss, 
            'log': loss_dict, 
            'progress_bar': loss_dict
        }

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        with th.no_grad():
            pred = self.model(images)
        self.val_map.update(
            preds=pred, target=targets
        )
        self.log_dict(self.val_map.compute(), on_step=False, on_epoch=True,prog_bar=False)

    def test_step(self, batch, batch_idx):
        images, targets = batch
        with th.no_grad():
            pred = self.model(images)
        # self.log(
        #     "test_iou", 
        #     thv.ops.box_iou(
        #         th.stack([t["boxes"] for t in pred ]).squeeze(), 
        #         th.stack([t["boxes"] for t in targets ]).squeeze()
        #     )
        # )
        self.test_map.update(preds=pred, target=targets)
        self.log_dict(self.test_map.compute(), on_step=True, on_epoch=True,prog_bar=False)

    # def on_validation_epoch_start(self) -> None:
    #     pass

    def on_validation_epoch_end(self) -> None:
         # self.log_dict(self._val_cls_metrics.compute(), on_epoch=True, on_step=False)
         # self.log_dict(self._val_seg_metrics.compute(), on_epoch=True, on_step=False)
         print(self.val_map.compute())

    def on_test_epoch_end(self) -> None:
        # self.log_dict(self._test_cls_metrics.compute(), on_epoch=True, on_step=False)
        # self.log_dict(self._test_seg_metrics.compute(), on_epoch=True, on_step=False)
        print(self.test_map.compute())

    # def optimizer_step(self, *args, **kwargs):
    #     super().optimizer_step(*args, **kwargs)
    #     optimizer.step()
    #     # self.lr_scheduler.step()  # Step per iteration


In [98]:
model = DetectModel(cfg)

In [4]:
train = data.train_dataloader()

for b in train:
    break

b[0][0].min(), b[0][0].max()

(tensor(0.), tensor(1.))

In [32]:
images, targets = b

In [35]:
images2, targets2 = model.model.transform(images, targets)

In [34]:
# for target_idx, target in enumerate(targets):
#     boxes = target["boxes"]
#     degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
#     if degenerate_boxes.any():
#         # print the first degenerate box
#         bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
#         degen_bb: List[float] = boxes[bb_idx].tolist()
#         torch._assert(
#             False,
#             "All bounding boxes should have positive height and width."
#             f" Found invalid box {degen_bb} for target at index {target_idx}.",
#         )

In [38]:
features = model.model.backbone(images2.tensors)

In [68]:
model.model.backbone.body

IntermediateLayerGetter(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): FrozenBatchNorm2d(64, eps=1e-05)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): FrozenBatchNorm2d(64, eps=1e-05)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): FrozenBatchNorm2d(256, eps=1e-05)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(256, eps=1e-05)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=Fa

In [70]:
th.isfinite(images2.tensors).all()

tensor(True)

In [72]:
model.model.backbone.body.keys()

odict_keys(['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4'])

In [99]:
x = model.model.backbone.body["conv1"](images2.tensors)
x = model.model.backbone.body["bn1"](x)
x = model.model.backbone.body["relu"](x)
x = model.model.backbone.body["maxpool"](x)

x = model.model.backbone.body["layer1"](x)

In [90]:
x.isfinite().all()

tensor(True)

In [100]:
dict(model.model.backbone.body["layer2"].named_parameters())

{'0.conv1.weight': Parameter containing:
 tensor([[[[ 0.0199]],
 
          [[ 0.0048]],
 
          [[-0.0053]],
 
          ...,
 
          [[ 0.0168]],
 
          [[-0.0298]],
 
          [[ 0.0058]]],
 
 
         [[[ 0.0026]],
 
          [[-0.0012]],
 
          [[-0.0586]],
 
          ...,
 
          [[ 0.0154]],
 
          [[ 0.0011]],
 
          [[-0.0478]]],
 
 
         [[[ 0.0179]],
 
          [[ 0.0079]],
 
          [[-0.0157]],
 
          ...,
 
          [[ 0.0143]],
 
          [[-0.0092]],
 
          [[ 0.0053]]],
 
 
         ...,
 
 
         [[[ 0.0077]],
 
          [[ 0.0091]],
 
          [[ 0.0028]],
 
          ...,
 
          [[-0.0019]],
 
          [[ 0.0785]],
 
          [[-0.0014]]],
 
 
         [[[-0.0018]],
 
          [[ 0.0049]],
 
          [[ 0.1432]],
 
          ...,
 
          [[-0.0012]],
 
          [[-0.0377]],
 
          [[ 0.0537]]],
 
 
         [[[-0.0126]],
 
          [[-0.0048]],
 
          [[-0.0034]],
 
          ...,
 

In [60]:
body_out = model.model.backbone.body(images2.tensors)

In [64]:
body_out["0"].shape

torch.Size([2, 256, 256, 200])

In [66]:
body_out["0"]

tensor([[[[1.1416e-01, 9.5460e-02, 9.0232e-02,  ..., 5.2702e-03,
           5.4701e-03, 2.8912e-02],
          [1.1626e-01, 8.5904e-02, 8.3676e-02,  ..., 5.1171e-03,
           5.1165e-03, 1.3138e-02],
          [1.1154e-01, 7.8069e-02, 8.0612e-02,  ..., 5.3065e-03,
           5.3234e-03, 1.2774e-02],
          ...,
          [2.7443e-02, 4.9617e-03, 5.4207e-03,  ..., 5.5040e-03,
           4.7913e-03, 1.9315e-02],
          [2.7736e-02, 4.6395e-03, 4.7594e-03,  ..., 4.8442e-03,
           4.3887e-03, 1.9372e-02],
          [3.6103e-02, 1.8216e-02, 1.8277e-02,  ..., 1.8328e-02,
           1.9690e-02, 2.8663e-02]],

         [[1.8153e-01, 1.8294e-01, 1.8229e-01,  ..., 1.1059e-01,
           1.1363e-01, 1.3227e-01],
          [1.7869e-01, 1.7694e-01, 1.7818e-01,  ..., 1.0054e-01,
           1.0509e-01, 1.1290e-01],
          [1.7884e-01, 1.7596e-01, 1.7922e-01,  ..., 1.0089e-01,
           1.0639e-01, 1.1001e-01],
          ...,
          [1.4532e-01, 1.4596e-01, 1.4629e-01,  ..., 1.4575

In [44]:
proposals, proposal_losses = model.model.rpn(images2, features, targets2)


In [46]:
proposals

[tensor([], size=(0, 4)), tensor([], size=(0, 4))]

In [48]:
targets2

[{'boxes': tensor([[ 925.0000,  420.3125, 1310.9375, 1120.3125]]),
  'labels': tensor([1])},
 {'boxes': tensor([[1026.5625,  270.3125, 1484.3750, 1190.6250]]),
  'labels': tensor([1])}]

In [49]:
objectness, pred_bbox_deltas = model.model.rpn.head(list(features.values()))


In [56]:
# features

In [16]:
# b[0][0]

In [6]:
b[0][0].shape

torch.Size([3, 640, 512])

In [7]:
model = DetectModel(cfg)

In [9]:
model

DetectModel(
  (model): FasterRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=1e-05)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): FrozenBatchNorm2d(64, eps=1e-05)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): FrozenBatchNorm2d(64, eps=1e-05)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [101]:


trainer = pl.Trainer(
    max_epochs=20,
    # accelerator=config.accelerator,
    # devices=[config.device],
    # callbacks=[
    #     checkpoint_callback,
    #     EarlyStopping(monitor=config.monitor_metric, patience=4, mode=config.monitor_mode),
    #     LearningRateMonitor(logging_interval='epoch'),
    # ],
)
trainer.fit(model=model, datamodule=data)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name     | Type                 | Params
--------------------------------------------------
0 | model    | FasterRCNN           | 41.3 M
1 | val_map  | MeanAveragePrecision | 0     
2 | test_map | MeanAveragePrecision | 0     
--------------------------------------------------
41.1 M    Trainable params
222 K     Non-trainable params
41.3 M    Total params
165.197   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

{'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

training_step:end:loss=tensor(21.7736, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.9261, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(3.9872e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(6.8150, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(14.0325, grad_fn=<DivBackward0>)}
training_step:end:loss=tensor(0.4948, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.2115, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(3.9383e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.2220, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.0612, grad_fn=<DivBackward0>)}
training_step:end:loss=tensor(0.2839, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.0449, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(9.0866e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.2223, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.0166, gra

Validation: 0it [00:00, ?it/s]

{'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}
training_step:end:loss=tensor(15.0481, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.0146, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(3.8331e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.7127, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(14.3208, grad_fn=<DivBackward0>)}
training_step:end:loss=tensor(0.7478, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.0048, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(3.7840e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.6894, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0

Validation: 0it [00:00, ?it/s]

{'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}
training_step:end:loss=tensor(0.2365, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.0025, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(3.6948e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.1759, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.0581, grad_fn=<DivBackward0>)}
training_step:end:loss=tensor(0.1395, grad_fn=<AddBackward0>) loss_dict={'loss_classifier': tensor(0.0034, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(5.0341e-05, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.0923, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.0

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [15]:
model.model.transform

GeneralizedRCNNTransform(
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    Resize(min_size=(800,), max_size=1333, mode='bilinear')
)

In [None]:
for images, targets in train_loader:
    break

In [None]:
thv.ops.box_iou(
    th.LongTensor([[1,2,3,4],[1,2,3,4],[1,2,3,4]]),
    th.LongTensor([[1,2,3,4],[1,2,3,4]])
)

In [None]:
def is_dist_avail_and_initialized():
    if not th.distributed.is_available():
        return False
    if not th.distributed.is_initialized():
        return False
    return True

def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()

def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.inference_mode():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict

def train_one_epoch(model, optimizer, data_loader, device, epoch, scaler=None):
    model.train()
    loss_values = []
    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)
        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )
    for i_batch, (images, targets) in enumerate(data_loader):
        # print(f"{i_batch=}")
        images = list(image.to(device) for image in images)
        targets = [
            {k: v.to(device) for k, v in t.items() if k!="ocr"} 
            for t in targets
        ]
        # with torch.cuda.amp.autocast(enabled=scaler is not None):
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()
        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)
        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()
        if lr_scheduler is not None:
            lr_scheduler.step()
        loss_values.append(loss_value)
    return loss_values

model = thv.models.detection.fasterrcnn_resnet50_fpn(
    # pretrained=True
    # weights="DEFAULT"
    
)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=0.05, momentum=0.9, weight_decay=0.0005
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=3, gamma=0.1
)
train_loader = data.train_dataloader()
for epoch in range(15):
    losses = train_one_epoch(
        model, 
        optimizer, 
        train_loader, 
        device=th.device('cpu'), 
        epoch=0
    )
    lr_scheduler.step()
    print(th.mean(th.FloatTensor(losses)),":", losses)
    # def eval_step(model, data_loader):
    data_loader = data.train_dataloader()
    metric = MeanAveragePrecision()
    model.eval()
    pred_shapes = []
    for i_batch, (images, targets) in enumerate(data_loader):
        with th.no_grad():
            pred = model(images)
            pred_shapes.append([p["boxes"].shape for p in pred])
            losses = model(images, targets)
        # print(f"losses={losses}")
        metric.update(
            preds=pred,target=targets
        )
    print(f"{pred_shapes=}")
    print(f"metrics={metric.compute()}")
    print()

In [None]:
to_pil = thv.transforms.ToPILImage()

In [None]:
from matplotlib import pyplot as plt

In [None]:
import PIL.ImageDraw as ImageDraw
from matplotlib.patches import Polygon



In [None]:
img = to_pil(images[0])

In [None]:
(xmin,ymin),(xmin,ymax),(xmax,ymax),(xmax,ymin)

In [None]:
# draw = ImageDraw.Draw(img)
# draw.polygon(([(xmin,ymin),(xmin,ymax),(xmax,ymax),(xmax,ymin)],), fill=200)

In [None]:
# idx=16
# idx = 72
idx = 78
xmin,xmax=[int(x) for x in sorted(pred[0]["boxes"][idx,[0,2]].tolist())]
ymin,ymax=[int(x) for x in sorted(pred[0]["boxes"][idx,[1,3]].tolist())]

pol = Polygon(
    [
        (xmin,ymin),
        (xmin,ymax),
        (xmax,ymax),
        (xmax,ymin),
        (xmin,ymin)
    ],
    ec='orangered',
    fc='none'
)
plt.imshow(img)
plt.gca().add_patch(pol)
plt.show()

In [None]:
idx

In [None]:
metric.compute()

In [None]:
targets

In [None]:
[p["boxes"].shape for p in pred]

In [None]:
type(model)
#(images,targets)

In [None]:
[x for x in pred if x["scores"].nelement()]

In [None]:
thv.models.detection.faster_rcnn.__file__

In [None]:
metric.compute()

In [None]:
thv.ops.box_iou?

In [None]:
targets

In [None]:
model(images, targets)

In [None]:
_=model.eval()

In [None]:
model(images)

In [None]:
thv.ops.box_iou(
    [x for x in pred if x["scores"].nelement()],
    targets
)

In [None]:
b.keys()

In [None]:
b["label"]

In [None]:
b["boxes"] = b["label"]

In [None]:
import torch as th

In [None]:
th.stack(b["boxes"], axis=1)

In [None]:
b["boxes"]

In [None]:
targets = []
for bbox in b["label"]:
    targets.append(
        {
            "boxes": 
        }
    )

In [None]:
th2pil = thv.transforms.ToPILImage()

In [None]:
th2pil(b["image"][0].tra)

In [None]:
modelb["image"][:1], [{"boxes":th.stack(b["boxes"], axis=1)}])