## Install Detectron2


In [1]:
%%capture
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

## Import Libraries


In [2]:
from detectron2.utils.memory import retry_if_cuda_oom
from detectron2.utils.logger import setup_logger
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import detectron2.data.transforms as T
from detectron2.data import detection_utils as utils
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader, DatasetMapper
from detectron2.utils.visualizer import Visualizer
from detectron2.structures import BoxMode
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm  # progress bar
import matplotlib.pyplot as plt
import json
import cv2
import copy
from typing import Optional

from IPython.display import FileLink
import sys
# torch
import torch

import gc

import warnings
# Ignore "future" warnings and Data-Frame-Slicing warnings.
warnings.filterwarnings('ignore')

setup_logger()

<_Logger detectron2 (DEBUG)>

## Downloading unilm


In [7]:
#better to use gdown 
!pip install gdown
!gdown 1KQTZ6mXstpckzAix3k3XPtY-iEdqyeKD

Collecting gdown
  Using cached gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
Downloading...
From (uriginal): https://drive.google.com/uc?id=1KQTZ6mXstpckzAix3k3XPtY-iEdqyeKD
From (redirected): https://drive.google.com/uc?id=1KQTZ6mXstpckzAix3k3XPtY-iEdqyeKD&confirm=t&uuid=00b595b6-3e33-48f9-8c9e-b42ab9b7778c
To: /kaggle/working/unilm.zip
100%|█████████████████████████████████████████| 108M/108M [00:00<00:00, 167MB/s]


## Unzipping unilm


In [9]:
# Replace '/kaggle/working/unilm.zip' with the actual path to your 'unilm.zip' file
zip_file_path = '/kaggle/working/unilm.zip'

# Replace 'unilm' with the name of the folder where you want to unzip the contents
output_folder = 'unilm'


# Unzip the file
!unzip $zip_file_path -d $output_folder

## Setting Path


In [10]:
sys.path.insert(1, "/kaggle/working/unilm/layoutlmv3")


In [11]:
! sed -i 's/from collections import Iterable/from collections.abc import Iterable/' /kaggle/working/unilm/layoutlmv3/examples/object_detection/ditod/table_evaluation/data_structure.py


## Importing vit config


In [12]:
from examples.object_detection.ditod import add_vit_config


In [14]:
# cfg = get_cfg()
# # Add PointRend-specific config
# add_vit_config(cfg)
# # Load a config from file
# cfg.merge_from_file("unilm/layoutlmv3/examples/object_detection/cascade_layoutlmv3.yaml")
# print(cfg)

## Setting Condition


In [16]:
from datetime import datetime

# if False, model is set to `PRETRAINED_PATH` model
is_train = True

# if True, evaluate on validation dataset
is_evaluate = False

# if True, run inference on test dataset
is_inference = True

# if True and `is_train` == True, `PRETRAINED_PATH` model is trained further
is_resume_training = False

# Perform augmentation
is_augment = False

SEED = 42

## Defining Path


In [17]:
from pathlib import Path


TEST_IMG_DIR = Path("/kaggle/input/dlsprint2/badlad/images/test")

TEST_METADATA_PATH = Path("/kaggle/input/dlsprint2/badlad/badlad-test-metadata.json")

# Training output directory
OUTPUT_DIR = Path("./output")
OUTPUT_MODEL = OUTPUT_DIR/"model_final.pth"

# Path to your pretrained model weights
PRETRAINED_PATH = Path("")

## JSON Load


In [18]:
with TEST_METADATA_PATH.open() as f:
    test_dict = json.load(f)

print("#### LABELS AND METADATA LOADED ####")

#### LABELS AND METADATA LOADED ####


## Organizing COCO


In [19]:
def organize_coco_data(data_dict: dict) -> tuple[list[str], list[dict], list[dict]]:
    thing_classes: list[str] = []

    # Map Category Names to IDs
    for cat in data_dict['categories']:
        thing_classes.append(cat['name'])

    # Images
    images_metadata: list[dict] = data_dict['images']

    # Convert COCO annotations to detectron2 annotations format
    data_annotations = []
    for ann in data_dict['annotations']:
        # coco format -> detectron2 format
        annot_obj = {
            # Annotation ID
            "id": ann['id'],

            # Segmentation Polygon (x, y) coords
            "gt_masks": ann['segmentation'],

            # Image ID for this annotation (Which image does this annotation belong to?)
            "image_id": ann['image_id'],

            # Category Label (0: paragraph, 1: text box, 2: image, 3: table)
            "category_id": ann['category_id'],

            "x_min": ann['bbox'][0],  # left
            "y_min": ann['bbox'][1],  # top
            "x_max": ann['bbox'][0] + ann['bbox'][2],  # left+width
            "y_max": ann['bbox'][1] + ann['bbox'][3]  # top+height
        }
        data_annotations.append(annot_obj)

    return thing_classes, images_metadata, data_annotations

In [20]:
thing_classes_test, images_metadata_test, _ = organize_coco_data(
    test_dict
)

In [21]:
test_metadata = pd.DataFrame(images_metadata_test)
test_metadata = test_metadata[['id', 'file_name', 'width', 'height']]
test_metadata = test_metadata.rename(columns={"id": "image_id"})
print("test_metadata size=", len(test_metadata))
test_metadata.head(5)

test_metadata size= 13000


Unnamed: 0,image_id,file_name,width,height
0,0,96eee398-1275-4768-be89-ec945e6c8bb0.png,720,1018
1,1,9b77c241-8292-4133-ab7a-0398a99f30a8.png,720,1019
2,2,3a6ac54b-d3f6-4783-9f71-b6ae29c93f7d.png,720,1105
3,3,2d0e29cd-83cb-4426-9663-1368c1975c37.png,1080,1920
4,4,f8c22a4a-6c89-4179-8845-12405bfd0035.png,1080,1920


## Registering Data


In [22]:
DATA_REGISTER_TEST     = "badlad_test"


## Detectron2 Format

In [23]:
def convert_coco_to_detectron2_format(
    imgdir: Path,
    metadata_df: pd.DataFrame,
    annot_df: Optional[pd.DataFrame] = None,
    target_indices: Optional[np.ndarray] = None,
):

    dataset_dicts = []
    for _, train_meta_row in tqdm(metadata_df.iterrows(), total=len(metadata_df)):
        # Iterate over each image
        image_id, filename, width, height = train_meta_row.values

        annotations = []

        # If train/validation data, then there will be annotations
        if annot_df is not None:
            for _, ann in annot_df.query("image_id == @image_id").iterrows():
                # Get annotations of current iteration's image
                class_id = ann["category_id"]
                gt_masks = ann["gt_masks"]
                bbox_resized = [
                    float(ann["x_min"]),
                    float(ann["y_min"]),
                    float(ann["x_max"]),
                    float(ann["y_max"]),
                ]

                annotation = {
                    "bbox": bbox_resized,
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "segmentation": gt_masks,
                    "category_id": class_id,
                }

                annotations.append(annotation)

        # coco format -> detectron2 format dict
        record = {
            "file_name": str(imgdir/filename),
            "image_id": image_id,
            "width": width,
            "height": height,
            "annotations": annotations
        }

        dataset_dicts.append(record)

    if target_indices is not None:
        dataset_dicts = [dataset_dicts[i] for i in target_indices]

    return dataset_dicts

In [24]:
# Register Test Inference data
DatasetCatalog.register(
    DATA_REGISTER_TEST,
    lambda: convert_coco_to_detectron2_format(
        TEST_IMG_DIR,
        test_metadata,
    )
)

# Set Test data categories
MetadataCatalog.get(DATA_REGISTER_TEST).set(
    thing_classes=thing_classes_test
)

dataset_dicts_test = DatasetCatalog.get(DATA_REGISTER_TEST)
metadata_dicts_test = MetadataCatalog.get(DATA_REGISTER_TEST)

print("dicts test size=", len(dataset_dicts_test))
print("################")

  0%|          | 0/13000 [00:00<?, ?it/s]

dicts test size= 13000
################


## Downloading Model Weight and Configs


In [25]:
#better to use gdown to fetch from drive
!gdown 1OkOEy7ZoF7Hmd24wAlvzVb7cEszkcrvk  #model weight

!gdown 1CwIgwAFY4s7Nz-ST7Al2KGL1qtrlIhFx  #config of layoutlmv3

Downloading...
From (uriginal): https://drive.google.com/uc?id=1OkOEy7ZoF7Hmd24wAlvzVb7cEszkcrvk
From (redirected): https://drive.google.com/uc?id=1OkOEy7ZoF7Hmd24wAlvzVb7cEszkcrvk&confirm=t&uuid=bb66abe8-d35f-4594-b15c-62eb12bc044d
To: /kaggle/working/final_train_layoutlmmv3.pth
100%|████████████████████████████████████████| 564M/564M [00:08<00:00, 64.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CwIgwAFY4s7Nz-ST7Al2KGL1qtrlIhFx
To: /kaggle/working/config.json
100%|██████████████████████████████████████████| 897/897 [00:00<00:00, 4.03MB/s]


## Setting Model Path


In [26]:
MODEL_PATH=Path("/kaggle/working/final_train_layoutlmmv3.pth")


## Setting Test Hyperparameters


In [27]:
inf_cfg = get_cfg()

add_vit_config(inf_cfg)
# Load a config from file
inf_cfg.merge_from_file("/kaggle/working/unilm/layoutlmv3/examples/object_detection/cascade_layoutlmv3.yaml")
inf_cfg.MODEL.CONFIG_PATH="/kaggle/working/config.json"
inf_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
inf_cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
inf_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
inf_cfg.MODEL.DEVICE = "cuda"

inf_cfg.DATALOADER.NUM_WORKERS = 1  # lower this if CUDA overflow occurs
inf_cfg.MODEL.WEIGHTS = str(MODEL_PATH)
BATCH = 1 # lower this if CUDA overflow occurs
test_loader = build_detection_test_loader(inf_cfg, DATA_REGISTER_TEST, batch_size=BATCH)

  0%|          | 0/13000 [00:00<?, ?it/s]

[32m[08/06 15:20:14 d2.data.build]: [0mDistribution of instances among all 4 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
| paragraph  | 0            |  text_box  | 0            |   image    | 0            |
|   table    | 0            |            |              |            |              |
|   total    | 0            |            |              |            |              |[0m
[32m[08/06 15:20:14 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[08/06 15:20:14 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[08/06 15:20:14 d2.data.common]: [0mSerializing 13000 elements to byte tensors and concatenating them all ...
[32m[08/06 15:20:14 d2.data.common]: 

In [28]:
#set acceptance threshold to 0.5
ACCEPTANCE_THRESHOLD = 0.5  # for all categories

In [29]:
print(f"#### MODEL: {inf_cfg.MODEL.WEIGHTS} FOR INFERENCE ####")


#### MODEL: /kaggle/working/final_train_layoutlmmv3.pth FOR INFERENCE ####


In [30]:
def rebuild_model():
    model = build_model(inf_cfg)
    _ = DetectionCheckpointer(model).load(inf_cfg.MODEL.WEIGHTS)
    return model


In [31]:
model = rebuild_model()


[32m[08/06 15:20:21 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /kaggle/working/final_train_layoutlmmv3.pth ...


## CUDA Problems


In [32]:
!export LRU_CACHE_CAPACITY=1
!export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'

In [33]:
vars_to_del = ["trainer", "predictor", "outputs"]

for v in vars_to_del:
    if v in globals():
        print(f"Deleting {v}")
        del globals()[v]
    elif v in locals():
        print(f"Deleting {v}")
        del locals()[v]

## Inference Utils


In [34]:
def rle_encode(mask):
#     print(mask)
    pixels = mask.T.flatten()
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return ' '.join(str(x) for x in rle)

In [35]:
@retry_if_cuda_oom
def get_masks(prediction):
    # get masks for each category
    take = prediction.scores >= ACCEPTANCE_THRESHOLD
    pred_masks = (prediction.pred_masks[take] != 0)
    pred_classes = prediction.pred_classes[take]
  
    rles = []
    for cat in range(len(thing_classes_test)):
        pred_mask = pred_masks[pred_classes == cat]
        
        pred_mask = retry_if_cuda_oom(torch.any)(pred_mask, dim=0)
#         pred_mask = torch.any(pred_mask, dim=0)
        rles.append(rle_encode(pred_mask.short().to("cpu").numpy()))

    return rles

In [36]:
def run_inference(data):
    results = []
    with torch.no_grad():
        outputs = model(data)
        if torch.cuda.is_available():
            torch.cuda.synchronize()

        for idx, output in enumerate(outputs):
            output = output["instances"]

            rles = get_masks(output)

            result = [
                f"{data[idx]['image_id']}_{cat},{rles[cat]}\n"
                for cat in range(len(thing_classes_test))
            ]

            results.extend(result)

        del outputs, output

    return results

## Running Inference on Test Data and Creating Submission File


In [38]:
torch.cuda.empty_cache()
gc.collect()

0

In [39]:
if is_inference:
    model.eval()
    submission_file = open("submission.csv", "w")
    submission_file.write("Id,Predicted\n")

    results: list[str] = []
    
    for i, data in enumerate(tqdm(test_loader)):
        res = run_inference(data)
        results.extend(res)
        
        if i % (500 // BATCH) == 0:
            print(f"Inference on batch {i}/{len(test_loader)} done")
            submission_file.writelines(results)
            results = []

    submission_file.writelines(results)
    submission_file.close()

  0%|          | 0/13000 [00:00<?, ?it/s]

Inference on batch 0/13000 done


In [None]:
if Path("submission.csv").exists:
    display(FileLink("submission.csv"))