#  *VinBigData Detectron2*
![](https://images.medicinenet.com/images/article/main_image/chest-x-ray.jpg)

* This competition can be classified as object detaction task to Automatically localize and  classify thoracic abnormalities from chest x-ray image and find a class and location of thoracic abnormalities from chest x-ray image (radiographs).

* The model i used in this notebook is Detectron2 
  Detectron2 is pytorch object detection library by Facebook AI Research next generation . 
  It   is a ground-up rewrite of the previous version, Detectron, and it originates from  maskrcnn- benchmark.
The link for there github repository : [https://github.com/facebookresearch/detectron2](http://)

* Detectron2 gave me better results than yolov5 ,you can check my other notebook that uses yolov5 :
[https://www.kaggle.com/khaledmgamal/vinbigdata-yolov5](http://)
* I learned a lot about detectron2 from this kernal   : [https://www.kaggle.com/corochann/vinbigdata-detectron2-train](http://)

In [None]:
!nvidia-smi

In [None]:
!nvcc --version

In [None]:
import torch

torch.__version__

# Installing Detectron2 
refer to this link to learn more :[https://detectron2.readthedocs.io/en/latest/tutorials/install.html](http://)

In [None]:
!pip install detectron2 -f \
  https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.7/index.html

# Train Detectron2 on custom dataset
you can follow this tutorial to learn more about that :
https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5

In [None]:
import pickle
from pathlib import Path
from typing import Optional
import cv2
import numpy as np
import pandas as pd
from detectron2.structures import BoxMode
from tqdm import tqdm


def get_vinbigdata_dicts(
    imgdir: Path,
    train_df: pd.DataFrame,
    train_meta:pd.DataFrame,
    train_data_type: str = "original",
    use_cache: bool = True,
    debug: bool = True,
    target_indices: Optional[np.ndarray] = None,
    use_class14: bool = False,
):
    '''
    parameters: 
              imgdir: the path to image directory 
              train_df: the dataframe that contians the images id and the bounding boxes
              train_meta: the dataframe that contians the images id and the original image size
    Returns           
               list of dict (dataset_dicts) where each dict contains following:

               -file_name: file name of the image.
               -image_id: id of the image, index is used here.
               -height: height of the image.
               -width: width of the image.
               -annotation: This is the ground truth annotation data for object detection, which contains following
               -bbox: bounding box pixel location with shape (n_boxes, 4)
               -bbox_mode: BoxMode.XYXY_ABS is used here, meaning that absolute value of (xmin, ymin, xmax, ymax) annotation is used in the bbox.
               -category_id: class label id for each bounding box, with shape (n_boxes,)
    '''   
    debug_str = f"_debug{int(debug)}"
    train_data_type_str = f"_{train_data_type}"
    class14_str = f"_14class{int(use_class14)}"
    cache_path = Path(".") / f"dataset_dicts_cache{train_data_type_str}{class14_str}{debug_str}.pkl"
    if not use_cache or not cache_path.exists():
        print("Creating data...")
        #train_meta = pd.read_csv(imgdir+'/'+"train_meta.csv")
        if debug:
            train_meta = train_meta.iloc[:500]  # For debug....

        # Load 1 image to get image size.
        image_id = train_meta.loc[0, "image_id"]
        image_path = str(imgdir+'/'+"train"+'/'+ f"{image_id}.png")
        image = cv2.imread(image_path)
        resized_height, resized_width, ch = image.shape
        print(f"image shape: {image.shape}")

        dataset_dicts = []
        for index, train_meta_row in tqdm(train_meta.iterrows(), total=len(train_meta)):
            record = {}

            image_id, height, width = train_meta_row.values
            filename = str(imgdir+'/'+"train"+'/'+ f"{image_id}.png")
            record["file_name"] = filename
            record["image_id"] = image_id
            record["height"] = resized_height
            record["width"] = resized_width
            objs = []
            for index2, row in train_df.query("image_id == @image_id").iterrows():
                # print(row)
                # print(row["class_name"])
                # class_name = row["class_name"]
                class_id = row["class_id"]
                if class_id == 14:
                    # It is "No finding"
                    if use_class14:
                        # Use this No finding class with the bbox covering all image area.
                        bbox_resized = [0, 0, resized_width, resized_height]
                        obj = {
                            "bbox": bbox_resized,
                            "bbox_mode": BoxMode.XYXY_ABS,
                            "category_id": class_id,
                        }
                        objs.append(obj)
                    else:
                        # This annotator does not find anything, skip.
                        pass
                else:
                    # bbox_original = [int(row["x_min"]), int(row["y_min"]), int(row["x_max"]), int(row["y_max"])]
                    h_ratio = resized_height / height
                    w_ratio = resized_width / width
                    bbox_resized = [
                        float(row["x_min"]) * w_ratio,
                        float(row["y_min"]) * h_ratio,
                        float(row["x_max"]) * w_ratio,
                        float(row["y_max"]) * h_ratio,
                    ]
                    obj = {
                        "bbox": bbox_resized,
                        "bbox_mode": BoxMode.XYXY_ABS,
                        "category_id": class_id,
                    }
                    objs.append(obj)
            record["annotations"] = objs
            dataset_dicts.append(record)
        with open(cache_path, mode="wb") as f:
            pickle.dump(dataset_dicts, f)

    print(f"Load from cache {cache_path}")
    with open(cache_path, mode="rb") as f:
        dataset_dicts = pickle.load(f)
    if target_indices is not None:
        dataset_dicts = [dataset_dicts[i] for i in target_indices]
    return dataset_dicts

##############################################################################################################

def get_vinbigdata_dicts_test(
    imgdir: Path, test_meta: pd.DataFrame, use_cache: bool = True, debug: bool = True,
):
    '''
    parameters: 
              imgdir: the path to image directory 
              test_meta: the dataframe that contians the images id and the original image size
    Returns           
               list of dict (dataset_dicts) where each dict contains following:

               -file_name: file name of the image.
               -image_id: id of the image, index is used here.
               -height: height of the image.
               -width: width of the image.

    '''       
    debug_str = f"_debug{int(debug)}"
    cache_path = Path(".") / f"dataset_dicts_cache_test{debug_str}.pkl"
    if not use_cache or not cache_path.exists():
        print("Creating data...")
        # test_meta = pd.read_csv(imgdir / "test_meta.csv")
        if debug:
            test_meta = test_meta.iloc[:500]  # For debug....

        # Load 1 image to get image size.
        image_id = test_meta.loc[0, "image_id"]
        image_path = str(imgdir+'/'+ "test"+'/'+ f"{image_id}.png")
        print(image_path)
        image = cv2.imread(image_path)
        resized_height, resized_width, ch = image.shape
        print(f"image shape: {image.shape}")

        dataset_dicts = []
        for index, test_meta_row in tqdm(test_meta.iterrows(), total=len(test_meta)):
            record = {}

            image_id, height, width = test_meta_row.values
            filename = str(imgdir+'/'+"test"+'/'+ f"{image_id}.png")
            record["file_name"] = filename
            # record["image_id"] = index
            record["image_id"] = image_id
            record["height"] = resized_height
            record["width"] = resized_width
            # objs = []
            # record["annotations"] = objs
            dataset_dicts.append(record)
        with open(cache_path, mode="wb") as f:
            pickle.dump(dataset_dicts, f)

    print(f"Load from cache {cache_path}")
    with open(cache_path, mode="rb") as f:
        dataset_dicts = pickle.load(f)
    return dataset_dicts


* When training this model i did not use the original images in the dataset provided by the competition ,i used the resized images which resized all the images to 256X256 to speed up the training . 
* link for the dataset: [https://www.kaggle.com/xhlulu/vinbigdata-chest-xray-resized-png-256x256](http://)

In [None]:
#read train.csv that contians the train image id and the bounding boxes of the detected thoracic abnormalities
train_df=pd.read_csv('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
#read train_meta.csv that contians the train image id and the original image size of the resized image
train_meta_df_ = pd.read_csv('../input/vinbigdata-chest-xray-resized-png-256x256/train_meta.csv')
#read train_meta.csv that contians the test image id and the original image size of the resized image
test_meta_df=pd.read_csv('../input/vinbigdata-testmeta/test_meta.csv')

imgdir='../input/vinbigdata-chest-xray-resized-png-256x256'
test_imgdir='../input/vinbigdata-chest-xray-resized-png-256x256'

#spliting the data to train validation and test sets
from sklearn.model_selection import train_test_split

train_meta_df,val_meta_df=train_test_split(train_meta_df_, test_size=0.2, random_state=42)

train_meta_df.reset_index(drop=True, inplace=True)
val_meta_df.reset_index(drop=True, inplace=True)
#Creating the trainig and validation and test list of dicts 
train_dict_list=get_vinbigdata_dicts(imgdir,train_df,train_meta_df,train_data_type='train',debug=False)
val_dict_list=get_vinbigdata_dicts(imgdir,train_df,val_meta_df,train_data_type='val',debug=False)

test_dict_list=get_vinbigdata_dicts_test(test_imgdir,test_meta_df,debug=False)

In [None]:
thing_classes = [
    "Aortic enlargement",
    "Atelectasis",
    "Calcification",
    "Cardiomegaly",
    "Consolidation",
    "ILD",
    "Infiltration",
    "Lung Opacity",
    "Nodule/Mass",
    "Other lesion",
    "Pleural effusion",
    "Pleural thickening",
    "Pneumothorax",
    "Pulmonary fibrosis"
]
category_name_to_id = {class_name: index for index, class_name in enumerate(thing_classes)}



# Register a Dataset
refer to this tutorial :[https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html#](http://)

In [None]:
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

DatasetCatalog.register("vinbigdata_train",lambda: get_vinbigdata_dicts(imgdir,train_df,train_meta_df,train_data_type='train',debug=False),)
train_metadata=MetadataCatalog.get("vinbigdata_train").set(thing_classes=thing_classes)

DatasetCatalog.register("vinbigdata_val",lambda: get_vinbigdata_dicts(imgdir,train_df,val_meta_df,train_data_type='val',debug=False),)
val_metadata=MetadataCatalog.get("vinbigdata_val").set(thing_classes=thing_classes)

DatasetCatalog.register("vinbigdata_test",lambda: get_vinbigdata_dicts_test(test_imgdir,test_meta_df,debug=False),)
test_metadata=MetadataCatalog.get("vinbigdata_test").set(thing_classes=thing_classes)

In [None]:
import random
import matplotlib.pyplot as plt

anomaly_image_ids = train_df.query("class_id != 14")["image_id"].unique()#[0:500]
anomaly_inds = np.argwhere(train_meta_df[:]["image_id"].isin(anomaly_image_ids).values)[:, 0]

for index in random.sample(anomaly_inds.tolist(), 3):
    d=train_dict_list[index]
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=1.0)
    out = visualizer.draw_dataset_dict(d)
    fig, axes = plt.subplots(1, 2, figsize=(20,20))
    axes[0].imshow(img)    
    axes[1].imshow(out.get_image()[:, :, ::-1])


# Adding image augmentation to the training pipline
refer to this link for more info :[https://detectron2.readthedocs.io/en/latest/tutorials/augmentation.html](http://)
a great kernel about adding  image augmentation in detectron2 :[https://www.kaggle.com/dhiiyaur/detectron-2-compare-models-augmentation](http://)

In [None]:
import albumentations as A
import copy
import numpy as np

import torch
from detectron2.data import detection_utils as utils


class AlbumentationsMapper:
    """Mapper which uses `albumentations` augmentations"""
    def __init__(self, cfg, is_train: bool = True):
        aug_kwargs = cfg.aug_kwargs
        aug_list = [
        ]
        if is_train:
            aug_list.extend([getattr(A, name)(**kwargs) for name, kwargs in aug_kwargs.items()])
        self.transform = A.Compose(#[
                         #A.Resize(256,256),
                         #A.RandomCrop(width=256, height=256,p=0.0),
                         #A.HorizontalFlip(p=0.0),
                         #A.RandomBrightnessContrast(p=0.0)
                        #]
                        aug_list ,
                        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_ids"])
                        )
        self.is_train = is_train

        mode = "training" if is_train else "inference"
        print(f"[AlbumentationsMapper] Augmentations used in {mode}: {self.transform}")

    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        #image=cv2.imread(dataset_dict["file_name"])
        #image = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)


        prev_anno = dataset_dict["annotations"]
        bboxes = np.array([obj["bbox"] for obj in prev_anno], dtype=np.float32)
        # category_id = np.array([obj["category_id"] for obj in dataset_dict["annotations"]], dtype=np.int64)
        category_id = np.arange(len(dataset_dict["annotations"]))

        transformed = self.transform(image=image, bboxes=bboxes, category_ids=category_id)
        image = transformed["image"]
        annos = []
        for i, j in enumerate(transformed["category_ids"]):
            d = prev_anno[j]
            d["bbox"] = transformed["bboxes"][i]
            annos.append(d)
        dataset_dict.pop("annotations", None)  # Remove unnecessary field.

        # if not self.is_train:
        #     # USER: Modify this if you want to keep them for some reason.
        #     dataset_dict.pop("annotations", None)
        #     dataset_dict.pop("sem_seg_file_name", None)
        #     return dataset_dict

        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
        instances = utils.annotations_to_instances(annos, image_shape)
        #dataset_dict["instances"]=instances
        dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict

In [None]:
import detectron2.data.transforms as T
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader
from detectron2.data import detection_utils as utils
import torch

def custom_mapper(dataset_dict):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [#T.Resize((800,800)),
                      #T.RandomFlip(prob=0.0, horizontal=False, vertical=True),
                      #T.RandomFlip(prob=0.0, horizontal=True, vertical=False), 
                      ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict

class Trainer_(DefaultTrainer):
    
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=AlbumentationsMapper(cfg, True))
        #return build_detection_train_loader(cfg, mapper=custom_mapper)
    


In [None]:
from dataclasses import dataclass, field
from typing import Dict


@dataclass
class Flags:
    # General
    debug: bool = True
    outdir: str = "results/det"

    # Data config
    imgdir_name: str = "vinbigdata-chest-xray-resized-png-256x256"
    split_mode: str = "all_train"  # all_train or valid20
    seed: int = 111
    train_data_type: str = "original"  # original or wbf
    use_class14: bool = False
    # Training config
    iter: int = 10000
    ims_per_batch: int = 2  # images per batch, this corresponds to "total batch size"
    num_workers: int = 4
    lr_scheduler_name: str = "WarmupMultiStepLR"  # WarmupMultiStepLR (default) or WarmupCosineLR
    base_lr: float = 0.00025
    roi_batch_size_per_image: int = 512
    #eval_period: int = 10000
    aug_kwargs: Dict = field(default_factory=lambda: {})

    def update(self, param_dict: Dict) -> "Flags":
        # Overwrite by `param_dict`
        for key, value in param_dict.items():
            if not hasattr(self, key):
                raise ValueError(f"[ERROR] Unexpected key for flag = {key}")
            setattr(self, key, value)
        return self

In [None]:
flags_dict = {
    "debug": False,
    "outdir": "results", 
    "imgdir_name": "vinbigdata-chest-xray-resized-png-256x256",
    #"split_mode": "valid20",
    "iter": 50000,
    "roi_batch_size_per_image": 512,
    #"eval_period": 5000,
    "lr_scheduler_name": "WarmupCosineLR",
    "base_lr": 0.001,
    "num_workers": 4,
    "aug_kwargs": {
        "HorizontalFlip": {"p": 0.5},
        "ShiftScaleRotate": {"scale_limit": 0.15, "rotate_limit": 10, "p": 0.5},
        "RandomBrightnessContrast": {"p": 0.5}
    }
}

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.logger import setup_logger
import copy
setup_logger()
import os

flags = Flags().update(flags_dict)
from detectron2.config.config import CfgNode as CN

cfg = get_cfg()
cfg.aug_kwargs = CN(flags.aug_kwargs)  # pass aug_kwargs to cfg

original_output_dir = cfg.OUTPUT_DIR
#cfg.OUTPUT_DIR = str(outdir)
print(f"cfg.OUTPUT_DIR {original_output_dir} -> {cfg.OUTPUT_DIR}")

config_name = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
cfg.merge_from_file(model_zoo.get_config_file(config_name))
cfg.DATASETS.TRAIN = ("vinbigdata_train",)
cfg.DATASETS.TEST=()
cfg.DATALOADER.NUM_WORKERS = flags.num_workers
# Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_name)
cfg.SOLVER.IMS_PER_BATCH = flags.ims_per_batch
cfg.SOLVER.LR_SCHEDULER_NAME = flags.lr_scheduler_name
cfg.SOLVER.BASE_LR = flags.base_lr  # pick a good LR
cfg.SOLVER.MAX_ITER = flags.iter
cfg.SOLVER.CHECKPOINT_PERIOD = 100000  # Small value=Frequent save need a lot of storage.
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = flags.roi_batch_size_per_image
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
# NOTE: this config means the number of classes,
# but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


In [None]:
trainer_=Trainer_(cfg)
train_data_loader = trainer_.build_train_loader(cfg)
data_iter = iter(train_data_loader)
batch = next(data_iter)

In [None]:
# visualization

#rows, cols = 1, 2
#plt.figure(figsize=(20,20))

for i, per_image in enumerate(batch[:4]):
    
    image_id=per_image['image_id']
    for dict_ in train_dict_list:
        if dict_['image_id']==image_id:
           d=dict_
        
    #plt.subplot(rows, cols, i+1)
    # Pytorch tensor is in (C, H, W) format

    
    
    image=cv2.imread(d["file_name"])
    visualizer_ = Visualizer(image, metadata=train_metadata, scale=1.0)
    out = visualizer_.draw_dataset_dict(d)
    
    
    img_aug = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
    img_aug = utils.convert_image_to_rgb(img_aug, cfg.INPUT.FORMAT)
    
    visualizer = Visualizer(img_aug, metadata=train_metadata, scale=1.0)

    target_fields = per_image["instances"].get_fields()
    labels = [
                train_metadata.thing_classes[i] for i in target_fields["gt_classes"]
            ]

    out_aug = visualizer.overlay_instances(
        labels=labels,
        boxes=target_fields.get("gt_boxes", None),
        masks=target_fields.get("gt_masks", None),
        keypoints=target_fields.get("gt_keypoints", None),
    )  
    
    fig, axes = plt.subplots(1, 4, figsize=(40,40))
    axes[0].imshow(image)    
    axes[1].imshow(out.get_image()[:, :, ::-1])
    axes[2].imshow(img_aug.astype("int"))    
    axes[3].imshow(out_aug.get_image()[:, :, ::-1])
    

# Training the model

In [None]:
#trainer_=Trainer_(cfg)
#trainer_.resume_or_load(resume=False)
#trainer_.train()

# load the trained model

In [None]:
import os
cfg.MODEL.WEIGHTS = '../input/detectron2-vinbigdata-model-final-weights/model_final.pth'#os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

# The evaluation on the validation dataset 

* the evaluation metric for this competition is like any other object detection problem is Mean Average Precision (mAP) but at IoU > 0.4.
* to learn more about mAP refer to this links
* [https://towardsdatascience.com/breaking-down-mean-average-precision-map-ae462f623a52](http://)
* [https://www.youtube.com/watch?v=FppOzcDvaDI&t=1s](http://)

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

evaluator = COCOEvaluator("vinbigdata_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "vinbigdata_val")
inference_on_dataset(predictor.model, val_loader, evaluator)#trainer_.model

# The visualization of the output of the trained model

In [None]:
from typing import Any, Union

from typing import Any, Dict, List
from numpy import ndarray

def predict_batch(predictor: DefaultPredictor, im_list: List[ndarray]) -> List:
    with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
        inputs_list = []
        for original_image in im_list:
            # Apply pre-processing to image.
            if predictor.input_format == "RGB":
                # whether the model expects BGR inputs or RGB
                original_image = original_image[:, :, ::-1]
            height, width = original_image.shape[:2]
            # Do not apply original augmentation, which is resize.
            # image = predictor.aug.get_transform(original_image).apply_image(original_image)
            image = original_image
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = {"image": image, "height": height, "width": width}
            inputs_list.append(inputs)
        predictions = predictor.model(inputs_list)
        return predictions
    
anomaly_image_ids = train_df.query("class_id != 14")["image_id"].unique()
train_meta = pd.read_csv(imgdir+"/train_meta.csv")
anomaly_inds = np.argwhere(train_meta["image_id"].isin(anomaly_image_ids).values)[:, 0]
anomaly_inds=[index for index in anomaly_inds if index < len(train_dict_list)]
index_list=[1999, 8190, 1544, 9247, 9521, 10621]#random.sample(anomaly_inds, 6)
dicts_=[train_dict_list[index] for index in index_list]

im_list=[cv2.imread(train_dict_list[index]["file_name"]) for index in index_list]

outputs_list = predict_batch(predictor, im_list)    
    

for img,d,output in zip(im_list,dicts_,outputs_list):
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=1.0)
    out = visualizer.draw_dataset_dict(d)
    
    v = Visualizer(img[:, :, ::-1],
                   metadata=train_metadata, 
                   scale=1.0, )


    out_ = v.draw_instance_predictions(output["instances"].to("cpu"))
    
    fig, axes = plt.subplots(1, 3, figsize=(40,40))
    axes[0].imshow(img)    
    axes[1].imshow(out.get_image()[:, :, ::-1])
    axes[2].imshow(out_.get_image()[:, :, ::-1])
    

In [None]:
test_list=os.listdir('../input/vinbigdata-chest-xray-resized-png-256x256/test')

In [None]:
len(test_meta_df)

# Making the submission file

In [None]:
from tqdm import tqdm
from math import ceil
batch_size=4
results_list=[]

def format_pred(labels: ndarray, boxes: ndarray, scores: ndarray) -> str:
    pred_strings = []
    for label, score, bbox in zip(labels, scores, boxes):
        xmin, ymin, xmax, ymax = bbox.astype(np.int64)
        pred_strings.append(f"{label} {score} {xmin} {ymin} {xmax} {ymax}")
    return " ".join(pred_strings)
index=0

for i in tqdm(range(ceil(len(test_dict_list) / batch_size))):
    inds = list(range(batch_size * i, min(batch_size * (i + 1), len(test_dict_list))))
    dataset_dicts_batch = [test_dict_list[i] for i in inds]
    im_list = [cv2.imread(d["file_name"]) for d in dataset_dicts_batch]
    outputs_list = predict_batch(predictor, im_list)

    for im, outputs, d in zip(im_list, outputs_list, dataset_dicts_batch):
        resized_height, resized_width, ch = im.shape

        image_id, dim0, dim1 = test_meta_df.iloc[index].values

        instances = outputs["instances"]
        if len(instances) == 0:
            # No finding, let's set 14 1 0 0 1 1x.
            result = {"image_id": image_id, "PredictionString": "14 1.0 0 0 1 1"}
        else:
            # Find some bbox...
            # print(f"index={index}, find {len(instances)} bbox.")
            fields: Dict[str, Any] = instances.get_fields()
            pred_classes = fields["pred_classes"]  # (n_boxes,)
            pred_scores = fields["scores"]
            # shape (n_boxes, 4). (xmin, ymin, xmax, ymax)
            pred_boxes = fields["pred_boxes"].tensor

            h_ratio = dim0 / resized_height
            w_ratio = dim1 / resized_width
            pred_boxes[:, [0, 2]] *= w_ratio
            pred_boxes[:, [1, 3]] *= h_ratio

            pred_classes_array = pred_classes.cpu().numpy()
            pred_boxes_array = pred_boxes.cpu().numpy()
            pred_scores_array = pred_scores.cpu().numpy()

            result = {
                "image_id": image_id,
                "PredictionString": format_pred(
                    pred_classes_array, pred_boxes_array, pred_scores_array
                ),
            }
        results_list.append(result)
        index += 1

In [None]:
submission_det = pd.DataFrame(results_list, columns=['image_id', 'PredictionString'])
submission_det.to_csv("./submission.csv", index=False)
#submission_det

In [None]:
from IPython.display import FileLink
FileLink(r'./submission.csv')