In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0
torch:  2.5 ; cuda:  cu124
detectron2: 0.6


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
import torch
import shutil
import zipfile
import matplotlib.pyplot as plt

from google.colab import files
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader

from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, DatasetEvaluator
from detectron2.data.datasets import register_coco_instances

### Auxiliary functions

In [None]:
def zip_and_download_folder(folder_path, folder_name='output'):
  shutil.make_archive(f"{folder_name}", "zip", folder_path)
  files.download(f"{folder_name}.zip")
  return

def unzip_folder(zip_folder_path, unzipped_folder_path):
  os.makedirs(unzipped_folder_path, exist_ok=True)
  with zipfile.ZipFile(zip_folder_path, 'r') as zip_ref:
      zip_ref.extractall(unzipped_folder_path)
  return

In [None]:
# Function to convert YOLO labels to COCO format
def yolo_to_coco(yolo_dir, image_dir, output_json):
    images = []
    annotations = []
    annotation_id = 1

    image_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg") or f.endswith(".png")]

    # Define categories
    CATEGORIES = [{"id": 2, "name": "car"}]

    for image_id, image_file in enumerate(image_files, start=1):
        img_path = os.path.join(image_dir, image_file)
        label_path = os.path.join(yolo_dir, os.path.splitext(image_file)[0] + ".txt")

        # Read image dimensions
        img = cv2.imread(img_path)
        if img is None:
            continue

        height, width, _ = img.shape

        # Add image to COCO structure
        images.append({
            "id": image_id,
            "file_name": image_file,
            "height": height,
            "width": width
        })

        # Read YOLO annotations
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                lines = f.readlines()

            for line in lines:
                parts = line.strip().split()
                category_id = int(parts[0])
                x_center, y_center, bbox_width, bbox_height = map(float, parts[1:])

                # Convert normalized coords to pixels
                x_min = (x_center - bbox_width / 2) * width
                y_min = (y_center - bbox_height / 2) * height
                bbox_width *= width
                bbox_height *= height

                # Add annotation to COCO structure
                annotations.append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "bbox": [x_min, y_min, bbox_width, bbox_height],
                    "area": bbox_width * bbox_height,
                    "iscrowd": 0
                })
                annotation_id += 1

    # Create final JSON structure
    coco_format = {
        "info": {
            "description": "Dataset converted from YOLO to COCO",
            "version": "1.0",
            "year": 2025,
            "contributor": "Conversión Automática",
            "date_created": "2025-03-03"
        },
        "licenses": [{"id": 1, "name": "CC-BY", "url": "http://creativecommons.org/licenses/by/4.0/"}],
        "images": images,
        "annotations": annotations,
        "categories": CATEGORIES
    }

    # Save as JSON
    with open(output_json, "w") as f:
        json.dump(coco_format, f, indent=4)

    print(f"Saved file: {output_json}")

# **1.1 Off-the-shelf**

### Prepare the dataset

We adapt the dataset from YOLO format. The following steps expect this example structure:

```
dataset_all
└───images
|   ├── frame_0001.jpg
|   ├── frame_0002.jpg
|   ├── ...
└───labels
    ├── frame_0001.txt
    ├── frame_0002.txt
    ├── ...
```

In [None]:
# Modify this path to your custom directory
DATASET_DIR = "/content/drive/MyDrive/"

In [None]:
TEST_DIR = "/content/yolov3/datasets/dataset_all"
IMAGE_DIR_TEST = os.path.join(TEST_DIR, "images")
LABEL_DIR_TEST = os.path.join(TEST_DIR, "labels")

unzip_folder(f"{DATASET_DIR}/dataset_all/labels.zip", LABEL_DIR_TEST)
unzip_folder(f"{DATASET_DIR}/dataset_all/images.zip", IMAGE_DIR_TEST)

# Convert to COCO
yolo_to_coco(LABEL_DIR_TEST, IMAGE_DIR_TEST, f"{DATASET_DIR}/annotations.json")
dataset_all = 'dataset_all'
register_coco_instances(f"{dataset_all}", {}, f"{DATASET_DIR}/annotations.json", "/content/yolov3/datasets/dataset_all/images")

Saved file: /content/drive/MyDrive/annotations.json


### Inference - Model selection: Faster R-CNN

Creates output video with predictions

In [None]:
# Config model Faster R-CNN
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

predictor = DefaultPredictor(cfg)
checkpointer = DetectionCheckpointer(predictor.model)
checkpointer.load(cfg.MODEL.WEIGHTS)

# Set input and output video paths
input_video_path = "vdo.avi"  # Path to input video
output_video_path = "vdo_out.avi" # Path to save output video
output_txt_folder = "bbox_output"  # Path to save bbox txt's
os.makedirs(output_txt_folder, exist_ok=True)

cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print("Error: Cannot open input file.")
    exit()

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f"Frame width: {width}, height: {height}")

fourcc = cv2.VideoWriter_fourcc(*"XVID")  # Codec
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

if not out.isOpened():
    print("Error: Cannot open output file.")
    exit()

print(f"Output video dimensions: {width}, {height}")

frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    outputs = predictor(frame)

    instances = outputs["instances"]
    # Filter for the 'car' class (class index 3 in COCO)
    car_class_index = 2
    car_instances = instances[instances.pred_classes == car_class_index]

    bboxes = car_instances.pred_boxes.tensor.cpu().numpy()  # Bounding boxes
    scores = car_instances.scores.cpu().numpy()  # Confidence
    labels = car_instances.pred_classes.cpu().numpy()  # detected classes

    txt_filename = os.path.join(output_txt_folder, f"frame_{frame_number:06d}.txt")
    with open(txt_filename, "w") as f:
        for bbox, score, label in zip(bboxes, scores, labels):
            x1, y1, x2, y2 = bbox  # Coordenadas del bbox
            f.write(f"{label} {x1} {y1} {x2} {y2} {score}\n")

    v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0, instance_mode=ColorMode.SEGMENTATION)
    v = v.draw_instance_predictions(car_instances.to("cpu"))  # Dibuja clase auto
    
    processed_frame = v.get_image()[:, :, ::-1]

    out.write(processed_frame)

    frame_number += 1

cap.release()
out.release()
cv2.destroyAllWindows()
print("Inference complete! The output video is saved as:", output_video_path)
print(f"Bounding boxes saved in folder: {output_txt_folder}")

zip_and_download_folder(f"{output_txt_folder}", f"bbox_output_{input_video_path}_off-the-shelf_confidence{cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST}")

[03/04 14:01:35 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from model_final.pth ...
[03/04 14:01:35 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


Frame width: 1920, height: 1080
Output video dimensions: 1920, 1080
Inference complete! The output video is saved as: vdo_out_finetune.avi
Bounding boxes saved in folder: bbox_output_total


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **1.2 Fine-tuning**

## **1.2.1 Prepare the datasets**
Convert datasets to COCO JSON format and load the JSON datasets. We adapt the dataset from YOLO format. The following steps expect this example structure:

```
dataset
└───images
|   └───train
|   |   ├── frame_0001.jpg
|   |   ├── frame_0002.jpg
|   |   ├── ...
|   └───val
|       ├── frame_0536.jpg
|       ├── frame_0537.jpg
|       ├── ...
└───labels
    └───train
    |   ├── frame_0001.txt
    |   ├── frame_0002.txt
    |   ├── ...
    └───val
        ├── frame_0536.txt
        ├── frame_0537.txt
        ├── ...
```

In [None]:
YOLO_DIR = "/content/yolov3/datasets"

unzip_folder("/content/drive/MyDrive/MCV/datasets/dataset_train.zip", f"{YOLO_DIR}/dataset_train")

IMAGE_DIR_TRAIN = os.path.join(YOLO_DIR, "dataset_train/images/train")
IMAGE_DIR_VAL = os.path.join(YOLO_DIR, "dataset_train/images/val")
LABEL_DIR_TRAIN = os.path.join(YOLO_DIR, "dataset_train/labels/train")
LABEL_DIR_VAL = os.path.join(YOLO_DIR, "dataset_train/labels/val")

IMAGE_DIR_TEST = os.path.join(YOLO_DIR, "dataset_test/images")
LABEL_DIR_TEST = os.path.join(YOLO_DIR, "dataset_test/labels")

unzip_folder("/content/drive/MyDrive/MCV/datasets/dataset_test/images.zip", IMAGE_DIR_TEST)
unzip_folder("/content/drive/MyDrive/MCV/datasets/dataset_test/labels.zip", LABEL_DIR_TEST)

# Ejecutar la conversión para train y val
yolo_to_coco(LABEL_DIR_TRAIN, IMAGE_DIR_TRAIN, "/content/drive/MyDrive/MCV/datasets/annotations_train.json")
yolo_to_coco(LABEL_DIR_VAL, IMAGE_DIR_VAL, "/content/drive/MyDrive/MCV/datasets/annotations_val.json")
yolo_to_coco(LABEL_DIR_TEST, IMAGE_DIR_TEST, "/content/drive/MyDrive/MCV/datasets/annotations_test.json")

dataset_train = 'dataset_train'
dataset_val = 'dataset_val'
dataset_test = 'dataset_test'

register_coco_instances(dataset_train, {}, "/content/drive/MyDrive/MCV/datasets/annotations_train.json", "/content/yolov3/datasets/dataset_train/images/train")
register_coco_instances(dataset_val, {}, "/content/drive/MyDrive/MCV/datasets/annotations_val.json", "/content/yolov3/datasets/dataset_train/images/val")
register_coco_instances(dataset_test, {}, "/content/drive/MyDrive/MCV/datasets/annotations_test.json", "/content/yolov3/datasets/dataset_test/images/test")

Saved file: /content/drive/MyDrive/MCV/datasets/annotations_train.json
Saved file: /content/drive/MyDrive/MCV/datasets/annotations_val.json
Saved file: /content/drive/MyDrive/MCV/datasets/annotations_test.json


## **1.2.2 Config Fine-Tuning**
Adjust the model config to train with our dataset.

In [None]:
# Config model and training
cfg = get_cfg()
model_name = "faster_rcnn_R_50_FPN_3x"
cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/{model_name}.yaml"))

cfg.DATASETS.TRAIN = (f"{dataset_train}",)
cfg.DATASETS.TEST = (f"{dataset_val}",)
cfg.MODEL.DEVICE = "cuda"
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-Detection/{model_name}.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 2000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 32
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Num classes of our dataset
cfg.MODEL.BACKBONE.FREEZE_AT = 2  # Freeze initial layers
# cfg.TEST.EVAL_PERIOD = 50  # Eval every 50 iters

cfg.OUTPUT_DIR = f"./output/fine-tuning/{model_name}"

# Train
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Evaluate
trainer.test(cfg, trainer.model, evaluators=[COCOEvaluator(f"{dataset_val}", cfg, True, cfg.OUTPUT_DIR)])  # Evaluamos al final

zip_and_download_folder(cfg.OUTPUT_DIR, model_name)

[03/03 22:12:32 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}


[03/03 22:12:32 d2.engine.train_loop]: Starting training from iteration 0
[03/03 22:12:42 d2.utils.events]:  eta: 0:15:49  iter: 19  total_loss: 1.611  loss_cls: 0.6981  loss_box_reg: 0.8371  loss_rpn_cls: 0.02506  loss_rpn_loc: 0.03936    time: 0.4843  last_time: 0.5628  data_time: 0.0286  last_data_time: 0.0393   lr: 4.9953e-06  max_mem: 2550M
[03/03 22:12:52 d2.utils.events]:  eta: 0:15:43  iter: 39  total_loss: 1.62  loss_cls: 0.6682  loss_box_reg: 0.8839  loss_rpn_cls: 0.02943  loss_rpn_loc: 0.04096    time: 0.4797  last_time: 0.4281  data_time: 0.0100  last_data_time: 0.0068   lr: 9.9902e-06  max_mem: 2550M
[03/03 22:13:02 d2.utils.events]:  eta: 0:15:49  iter: 59  total_loss: 1.613  loss_cls: 0.6125  loss_box_reg: 0.93  loss_rpn_cls: 0.02827  loss_rpn_loc: 0.03677    time: 0.4894  last_time: 0.5059  data_time: 0.0125  last_data_time: 0.0057   lr: 1.4985e-05  max_mem: 2550M
[03/03 22:13:12 d2.utils.events]:  eta: 0:15:40  iter: 79  total_loss: 1.483  loss_cls: 0.5529  loss_box_re

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## **1.2.3 Evaluate the model**
After training, evaluate over the validation set

In [None]:
# Evaluate on validation set
evaluator = COCOEvaluator(f"{dataset_val}", cfg, False, output_dir=f"./output/fine-tuning/eval/{model_name}")
val_loader = build_detection_test_loader(cfg, f"{dataset_val}")
print(inference_on_dataset(trainer.model, val_loader, evaluator))

[03/03 22:32:03 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[03/03 22:32:03 d2.data.datasets.coco]: Loaded 133 images in COCO format from annotations_val.json
[03/03 22:32:03 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[03/03 22:32:03 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[03/03 22:32:03 d2.data.common]: Serializing 133 elements to byte tensors and concatenating them all ...
[03/03 22:32:03 d2.data.common]: Serialized dataset takes 0.10 MiB
[03/03 22:32:03 d2.evaluation.evaluator]: Start inference on 133 batches
[03/03 22:32:05 d2.evaluation.evaluator]: Inference done 11/133. Dataloading: 0.0211 s/iter. Inference: 0.1389 s/iter. Eval: 0.0013 s/iter. Total: 0.1613 s/i

## **1.2.3 Use the trained model**

In [None]:
cfg.MODEL.WEIGHTS = "./output/model_final.pth"  # Load trained weights
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
predictor = DefaultPredictor(cfg)

[03/03 22:34:57 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


In [None]:
# Path to input video and output files
input_video_path = "vdo.avi"
output_video_path = "vdo_out.avi"
output_txt_folder = "bbox_output"
os.makedirs(output_txt_folder, exist_ok=True)

cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print("Error: Cannot open input file.")
    exit()

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f"Frame width: {width}, height: {height}")

fourcc = cv2.VideoWriter_fourcc(*"XVID")  # Codec
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

if not out.isOpened():
    print("Error: Cannot open output file.")
    exit()

print(f"Output video dimensions: {width}, {height}")

frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    outputs = predictor(frame)

    instances = outputs["instances"]
    bboxes = instances.pred_boxes.tensor.cpu().numpy()  # Bounding boxes
    scores = instances.scores.cpu().numpy()  # Confdence
    labels = instances.pred_classes.cpu().numpy()  # Detected classes

    txt_filename = os.path.join(output_txt_folder, f"frame_{frame_number:06d}.txt")
    with open(txt_filename, "w") as f:
        for bbox, score, label in zip(bboxes, scores, labels):
            x1, y1, x2, y2 = bbox
            f.write(f"{label} {x1} {y1} {x2} {y2} {score}\n")

    v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0)
    v = v.draw_instance_predictions(car_instances.to("cpu"))  # Dibuja clase auto

    processed_frame = v.get_image()[:, :, ::-1]

    out.write(processed_frame)

    frame_number += 1

cap.release()
out.release()
cv2.destroyAllWindows()
print("Inference complete! The output video is saved as:", output_video_path)
print(f"Bounding boxes saved in folder: {output_txt_folder}")

In [None]:
# Evaluate the model over the test set
evaluator = COCOEvaluator("dataset_test", cfg, False, output_dir="./output/")
test_loader = build_detection_test_loader(cfg, "dataset_test")
print(inference_on_dataset(trainer.model, test_loader, evaluator))

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))

cfg.MODEL.DEVICE = "cuda"
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Ajust tu the num of classes of our dataset
cfg.MODEL.WEIGHTS = "/content/drive/MyDrive/MCV/model_final.pth"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

# Construct the model manually instead of using 'trainer'
model = build_model(cfg)
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
model.eval()

# Eval over the test set
evaluator = COCOEvaluator("dataset_test", cfg, False, output_dir="./output/")
test_loader = build_detection_test_loader(cfg, "dataset_test")
# print(inference_on_dataset(model, test_loader, evaluator))

results = inference_on_dataset(model, test_loader, evaluator)
output_path = "output/results.json"
with open(output_path, "w") as f:
    json.dump(results, f, indent=4)

print(f"results saved at: {output_path}")

[03/04 00:13:42 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/drive/MyDrive/MCV/model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


[03/04 00:13:42 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[03/04 00:13:43 d2.data.datasets.coco]: Loaded 1606 images in COCO format from /content/drive/MyDrive/MCV/annotations_test.json
[03/04 00:13:43 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[03/04 00:13:43 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[03/04 00:13:43 d2.data.common]: Serializing 1606 elements to byte tensors and concatenating them all ...
[03/04 00:13:43 d2.data.common]: Serialized dataset takes 1.24 MiB
[03/04 00:13:43 d2.evaluation.evaluator]: Start inference on 1606 batches
[03/04 00:13:45 d2.evaluation.evaluator]: Inference done 11/1606. Dataloading: 0.0175 s/iter. Inference: 0.1360 s/iter. Eval: 