## Install Package

In [1]:
from IPython.display import clear_output

!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/pycocotools-2.0.6-cp310-cp310-linux_x86_64.whl
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/torch-1.12.1+cu116-cp310-cp310-linux_x86_64.whl
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/torchvision-0.13.1+cu116-cp310-cp310-linux_x86_64.whl
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/mmcv-2.0.1-cp310-cp310-manylinux1_x86_64.whl 
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/openmim-0.3.9-py2.py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/mmengine-0.7.4-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmdetectron-31-wheel/addict-2.4.0-py3-none-any.whl
!pip install yapf==0.40.1
!pip install --no-index --no-deps /kaggle/input/mmpretrain/einops-0.6.1-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmpretrain/mat4py-0.5.0-py2.py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmpretrain/ordered_set-4.1.0-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmpretrain/model_index-0.1.11-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmpretrain/modelindex-0.0.2-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/mmpretrain/mmpretrain-1.0.0rc8-py2.py3-none-any.whl

clear_output()

## Importing the required pakages

In [2]:
from tqdm.notebook import tqdm
import pandas as pd

import matplotlib.pyplot as plt
from PIL import Image

from glob import glob
import matplotlib.pyplot as plt

In [3]:
!git clone -bv3.1.0 https://github.com/open-mmlab/mmdetection.git
%cd /kaggle/working/mmdetection
!pip install -v -e .

clear_output()

In [4]:
import torch, torchvision,mmdet
print("PyTorch version= ",torch.__version__,torch.cuda.is_available())
print("mmdet version= ",mmdet.__version__)

from mmcv.ops import get_compiling_cuda_version
print("THE CUDA version ", get_compiling_cuda_version())

PyTorch version=  1.12.1+cu116 True
mmdet version=  3.1.0
THE CUDA version  11.6


In [5]:
!pip install -U openmim
clear_output()

## Loading the Dataset

In [6]:
!cp -r /kaggle/input/vaihingen-ins-seg-v2/vaihingen-instance-seg-v2 /kaggle/working

In [7]:
import os
import json

# Function to load, process, and save dataset
def process_dataset(dataset_path):
    # Load the dataset JSON file
    with open(dataset_path, 'r', encoding='utf-8') as f:
        coco_data = json.load(f)

    # Display category classes and number of instances per class
    categories = coco_data['categories']
    annotations = coco_data['annotations']

    # Remove the 'buildings' category
    updated_categories = [category for category in categories if category['name'] != 'building-UhLA']
    updated_category_ids = {category['id'] for category in updated_categories}

    # Remove annotations that belong to the 'buildings' category
    updated_annotations = [annotation for annotation in annotations if annotation['category_id'] in updated_category_ids]

    # Update the dataset with the new categories and annotations
    coco_data['categories'] = updated_categories
    coco_data['annotations'] = updated_annotations

    # Save the updated dataset JSON file
    with open(dataset_path, 'w', encoding='utf-8') as f:
        json.dump(coco_data, f, ensure_ascii=False, indent=4)

# Paths to the dataset JSON files
train_anno_path = '/kaggle/working/vaihingen-instance-seg-v2/annotations/train_annotations.coco.json'
val_anno_path = '/kaggle/working/vaihingen-instance-seg-v2/annotations/valid_annotations.coco.json'
test_anno_path = '/kaggle/working/vaihingen-instance-seg-v2/annotations/test_annotations.coco.json'

# Process each dataset
process_dataset(train_anno_path)
process_dataset(val_anno_path)
process_dataset(test_anno_path)

In [8]:
%pwd

'/kaggle/working/mmdetection'

In [9]:
!mkdir -p configs/custom

## Train Custom Cascade

In [10]:
%%writefile configs/custom/r101_custom_config.py

_base_ = '/kaggle/working/mmdetection/configs/cascade_rcnn/cascade-mask-rcnn_x101-32x4d_fpn_20e_coco.py'

model = dict(
    roi_head=dict(
        bbox_head=[
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.1, 0.1, 0.2, 0.2]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.05, 0.05, 0.1, 0.1]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.033, 0.033, 0.067, 0.067]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0))
        ],
        mask_head=dict(
            type='FCNMaskHead',
            num_convs=4,
            in_channels=256,
            conv_out_channels=256,
            num_classes=1,
            loss_mask=dict(
                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))
    )
)

classes = ('buildings',)
backend_args = None
data_root = '/kaggle/working/vaihingen-instance-seg-v2/'
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

param_scheduler = [
    dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
    dict(
        type='MultiStepLR',
        begin=0,
        end=12,
        by_epoch=True,
        milestones=[8, 11],
        gamma=0.1)
]

train_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion',
         brightness_delta=32, contrast_range=(0.5, 1.5),
         saturation_range=(0.5, 1.5), hue_delta=18),
    dict(type='PackDetInputs')
]

test_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor'))
]

train_dataloader = dict(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        metainfo=dict(classes=classes),
        ann_file=data_root + 'annotations/train_annotations.coco.json',
        data_prefix=dict(img='train/'),
        pipeline=train_pipeline,
        backend_args=backend_args))

val_dataloader = dict(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        metainfo=dict(classes=classes),
        ann_file=data_root + 'annotations/valid_annotations.coco.json',
        data_prefix=dict(img='valid/'),
        test_mode=True,
        pipeline=test_pipeline,
        backend_args=backend_args))

test_dataloader = val_dataloader

# Focus on Fusing bounding boxes
test_cfg=dict(
    rpn=dict(
        nms_pre=1000,
        max_per_img=1000,
        nms=dict(type='nms', iou_threshold=0.7),
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05,
        nms=dict(type='soft_nms', iou_threshold=0.5),
        max_per_img=100)
)

val_evaluator = dict(
    ann_file=data_root + 'annotations/valid_annotations.coco.json', metric=['bbox', 'segm'])
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth'
test_evaluator = val_evaluator
resume_from = None


Writing configs/custom/r101_custom_config.py


In [11]:
!python tools/train.py /kaggle/working/mmdetection/configs/custom/r101_custom_config.py

09/05 17:18:16 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]
    CUDA available: True
    numpy_random_seed: 1270409352
    GPU 0: Tesla P100-PCIE-16GB
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 1.12.1+cu116
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.6
  - NVCC architecture flags: -genc

### Inference 1st

In [12]:
# import os
# from mmcv import imread
# from mmdet.apis import inference_detector
# from mmdet.visualization import DetLocalVisualizer
# import matplotlib.pyplot as plt
# from IPython.display import display, Image
# from mmdet.apis import init_detector, inference_detector
# from mmengine.config import Config
# from mmcv import imread
# from mmdet.registry import VISUALIZERS

In [13]:
# # Load the configuration file
# cfg = Config.fromfile('/kaggle/working/mmdetection/configs/custom/custom_config_finetuned.py')

# # Initialize the model
# model = init_detector(cfg, '/kaggle/working/mmdetection/work_dirs/custom_config_finetuned/epoch_24.pth', device='cuda:0')

In [14]:

# # Directory containing test images
# img_dir = '/kaggle/working/vaihingen-instance-seg-v2/test/'
# save_dir = '/kaggle/working/inf_result/'

# # Create the save directory if it does not exist
# os.makedirs(save_dir, exist_ok=True)

# # Initialize the visualizer (execute this block only once)
# visualizer = VISUALIZERS.build(model.cfg.visualizer)
# visualizer.dataset_meta = model.dataset_meta

# # Set custom visualization properties
# visualizer._VISUALIZATION_COLORS = [(255, 0, 0)]  # Example: Set the color to red
# visualizer.alpha = 0.5  # Set the transparency (alpha) for the segmentation masks

# # Loop through all images in the directory and perform inference
# for img_name in os.listdir(img_dir):
#     if img_name.endswith('.jpg') or img_name.endswith('.png'):  # Ensure it's an image file
#         img_path = os.path.join(img_dir, img_name)
#         image = imread(img_path)
        
#         # Perform inference
#         result = inference_detector(model, image)
        
#         # Custom name for the result
#         result_name = f"resnext101_result_{os.path.splitext(img_name)[0]}.jpg"
        
#         # Visualize the results
#         output_file = os.path.join(save_dir, result_name)
#         visualizer.add_datasample(
#             result_name,  # Use the custom name as the sample name
#             image,
#             data_sample=result,
#             draw_gt=False,
#             show=False,  # Set to True if you want to display each image during processing
#             wait_time=0,
#             out_file=output_file  # Save the result to the output directory with the custom name
#         )
        
#         # Display the result
#         display(Image(filename=output_file))

# print("Inference completed for all images.")

## Improved (ADD DCN + SoftNMS)

In [15]:
%%writefile configs/custom/r101_custom_config_dcn_nms.py

_base_ = '/kaggle/working/mmdetection/configs/cascade_rcnn/cascade-mask-rcnn_x101-32x4d_fpn_20e_coco.py'

model = dict(
    backbone=dict(
        type='ResNeXt',
        depth=101,
        groups=32,
        base_width=4,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'),
        dcn=dict(
            type='DCN',
            deformable_groups=1,
            fallback_on_stride=False),
        stage_with_dcn=[False, True, True, True]  # Apply DCN to the last three stages
    ),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5
    ),
    roi_head=dict(
        bbox_head=[
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.1, 0.1, 0.2, 0.2]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.05, 0.05, 0.1, 0.1]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.033, 0.033, 0.067, 0.067]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                               loss_weight=1.0))
        ],
        mask_head=dict(
            type='FCNMaskHead',
            num_convs=4,
            in_channels=256,
            conv_out_channels=256,
            num_classes=1,
            loss_mask=dict(
                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))
    )
)

classes = ('buildings',)
backend_args = None
data_root = '/kaggle/working/vaihingen-instance-seg-v2/'
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

param_scheduler = [
    dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
    dict(
        type='MultiStepLR',
        begin=0,
        end=12,
        by_epoch=True,
        milestones=[8, 11],
        gamma=0.1)
]

train_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion',
         brightness_delta=32, contrast_range=(0.5, 1.5),
         saturation_range=(0.5, 1.5), hue_delta=18),
    dict(type='PackDetInputs')
]

test_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor'))
]

train_dataloader = dict(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        metainfo=dict(classes=classes),
        ann_file=data_root + 'annotations/train_annotations.coco.json',
        data_prefix=dict(img='train/'),
        pipeline=train_pipeline,
        backend_args=backend_args))

val_dataloader = dict(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        metainfo=dict(classes=classes),
        ann_file=data_root + 'annotations/valid_annotations.coco.json',
        data_prefix=dict(img='valid/'),
        test_mode=True,
        pipeline=test_pipeline,
        backend_args=backend_args))

test_dataloader = val_dataloader

# Focus on Fusing bounding boxes
test_cfg=dict(
    rpn=dict(
        nms_pre=1000,
        max_per_img=1000,
        nms=dict(type='nms', iou_threshold=0.7),
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05,
        nms=dict(type='soft_nms', iou_threshold=0.5),
        max_per_img=100)
)

val_evaluator = dict(
    ann_file=data_root + 'annotations/valid_annotations.coco.json', metric=['bbox', 'segm'])
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth'
test_evaluator = val_evaluator
resume_from = None


Writing configs/custom/r101_custom_config_dcn_nms.py


In [16]:
!python tools/train.py /kaggle/working/mmdetection/configs/custom/r101_custom_config_dcn_nms.py

09/05 17:25:14 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]
    CUDA available: True
    numpy_random_seed: 1508220089
    GPU 0: Tesla P100-PCIE-16GB
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 1.12.1+cu116
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.6
  - NVCC architecture flags: -genc

### Inference 2nd

In [17]:
# %pwd

In [18]:
# import mmdet
# import mmcv
# import torch

# print("MMDetection Version:", mmdet.__version__)
# print("MMCV Version:", mmcv.__version__)
# print("PyTorch Version:", torch.__version__)


In [19]:
# from mmdet.apis import init_detector, inference_detector
# from mmengine.config import Config
# from mmcv import imread
# from mmdet.registry import VISUALIZERS

In [20]:
# # Load the configuration file
# cfg = Config.fromfile('/kaggle/working/mmdetection/configs/custom/r101_custom_config_dcn_3.py')

# # Initialize the model
# model = init_detector(cfg, '/kaggle/working/mmdetection/work_dirs/r101_custom_config_dcn_3/epoch_12.pth', device='cuda:0')


In [21]:
# !mkdir /kaggle/working/inf_result

In [22]:
# import os
# from mmcv import imread
# from mmdet.apis import inference_detector
# from mmdet.visualization import DetLocalVisualizer
# import matplotlib.pyplot as plt
# from IPython.display import display, Image

# # Directory containing test images
# img_dir = '/kaggle/working/vaihingen-instance-seg-v2/test/'
# save_dir = '/kaggle/working/inf_result/'

# # Create the save directory if it does not exist
# os.makedirs(save_dir, exist_ok=True)

# # Initialize the visualizer (execute this block only once)
# visualizer = VISUALIZERS.build(model.cfg.visualizer)
# visualizer.dataset_meta = model.dataset_meta

# # Set custom visualization properties
# visualizer._VISUALIZATION_COLORS = [(255, 0, 0)]  # Example: Set the color to red
# visualizer.alpha = 0.5  # Set the transparency (alpha) for the segmentation masks

# # Loop through all images in the directory and perform inference
# for img_name in os.listdir(img_dir):
#     if img_name.endswith('.jpg') or img_name.endswith('.png'):  # Ensure it's an image file
#         img_path = os.path.join(img_dir, img_name)
#         image = imread(img_path)
        
#         # Perform inference
#         result = inference_detector(model, image)
        
#         # Custom name for the result
#         result_name = f"final_inf_result_{os.path.splitext(img_name)[0]}.jpg"
        
#         # Visualize the results
#         output_file = os.path.join(save_dir, result_name)
#         visualizer.add_datasample(
#             result_name,  # Use the custom name as the sample name
#             image,
#             data_sample=result,
#             draw_gt=False,
#             show=False,  # Set to True if you want to display each image during processing
#             wait_time=0,
#             out_file=output_file  # Save the result to the output directory with the custom name
#         )
        
#         # Display the result
#         display(Image(filename=output_file))

# print("Inference completed for all images.")