Code Author: Ha Eungyeom (eungyeom_ha@yonsei.ac.kr)                
 This code is developed for training and evaluating a Faster-RCNN model on the HOD datasetCases

### Training Code - Normal Cases
#### Paper Section: 3.1 Environment Setup

In [None]:
# Displaying the current working directory
!pwd

In [None]:
# Changing to the parent directory
cd ..

In [None]:
# Installing necessary library for file downloading
!pip install down

# Downloading the dataset using Google Drive link
!gdown --id 1NEQWK062dMREwDSbHOPPMx-99iUVebrN -O faster_rcnn_dataset.zip

# Creating a directory for the dataset and extracting the dataset there
!mkdir faster_rcnn_data
!unzip faster_rcnn_dataset.zip -d faster_rcnn_data

In [None]:
!pwd

In [None]:
# Setting up the name for normal case training
name = '/rcnn_normal/'

In [None]:
# Importing required libraries
import os
import xml.etree.ElementTree as ET

# Specifying the directory path for annotations
directory_path = './faster_rcnn_data' + name  + 'Annotations/'

### Function to remove spaces from tags in XML files
#### Paper Section: 3.2 Data Preprocessing

In [None]:
def remove_spaces_from_tags(dir_path):
    for filename in os.listdir(dir_path):
        if filename.lower().endswith('.xml'):
            filepath = os.path.join(dir_path, filename)
            
            # Parsing the XML file
            tree = ET.parse(filepath)
            root = tree.getroot()

            # Removing spaces from <filename> and <path> tags
            for tag in ['filename', 'path']:
                element = root.find(tag)
                if element is not None:
                    # 공백 제거
                    element.text = element.text.replace(" ", "")
            
            # Saving the changes back to the XML file
            tree.write(filepath)
            print(f"Changed file: {filename}")

# Executing the function to preprocess XML annotations
remove_spaces_from_tags(directory_path)

### Installation of MMDetection
#### Paper Section: 3.3 Framework Preparation
Detailed steps and explanations for setting up MMDetection, including addressing version compatibilities.
.


In [None]:
# Checking the version of PyTorch
import torch
print(torch.__version__)

In [None]:
# Downgrading PyTorch to 1.13.0+cu116 for compatibility with MMDetection
!pip install torch==1.13.0+cu116 torchvision==0.14.0+cu116 --extra-index-url https://download.pytorch.org/whl/cu116

In [None]:
# Installing mmcv-full
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.13/index.html

In [None]:
# Cloning and installing MMDetection (version 2.x)
!git clone --branch 2.x https://github.com/open-mmlab/mmdetection.git
!cd mmdetection; python setup.py install

In [None]:
from mmdet.apis import init_detector, inference_detector
import mmcv

### Conversion of PASCAL VOC dataset to MS-COCO format
#### Paper Section: 3.4 Dataset Conversion
Steps and code snippets for converting the dataset from PASCAL VOC format to MS-COCO format using a utility..


In [None]:
!pwd

In [None]:
# Cloning the voc2coco utility
!git clone https://github.com/yukkyo/voc2coco.git # voc -> cooo

In [None]:
!pwd

In [None]:
!cat ./faster_rcnn_data/labels.txt

In [None]:
cd ./voc2coco/

In [None]:
# Converting VOC to COCO format for train, validation, and test sets
!python voc2coco.py --ann_dir ../faster_rcnn_data/rcnn_normal/Annotations \
--ann_ids ../faster_rcnn_data/rcnn_normal/ImageSets/Main/train.txt \
--labels ../faster_rcnn_data/labels.txt \
--output ../faster_rcnn_data/rcnn_normal/train.json \
--ext xml

!python voc2coco.py --ann_dir ../faster_rcnn_data/rcnn_normal/Annotations \
--ann_ids ../faster_rcnn_data/rcnn_normal/ImageSets/Main/validation.txt \
--labels ../faster_rcnn_data/labels.txt \
--output ../faster_rcnn_data/rcnn_normal/val.json \
--ext xml

!python voc2coco.py --ann_dir ../faster_rcnn_data/rcnn_normal/Annotations \
--ann_ids ../faster_rcnn_data/rcnn_normal/ImageSets/Main/test.txt \
--labels ../faster_rcnn_data/labels.txt \
--output ../faster_rcnn_data/rcnn_normal/test.json \
--ext xml

In [None]:
!cat ../faster_rcnn_data/rcnn_normal/train.json

In [None]:
!pwd

In [None]:
cd ..

### Configuration Setup and Model Training
#### Paper Section: 4.1 Training Procedure
Detailed code snippets for configuring the training setup, defining the custom dataset class, and initiating the training process.

In [None]:
# Configuring the dataset, model, and training parameters
from mmcv import Config
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.coco import CocoDataset
from mmdet.apis import set_random_seed, train_detector
from mmdet.models import build_detector

# Defining the custom dataset class
@DATASETS.register_module(force=True)
class HOD(CocoDataset):
    CLASSES = ('alcohol', 'insulting_gesture', 'blood', 'cigarette', 'gun', 'knife') 

In [None]:
# Load the configuration file
config_file = './mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
checkpoint_file = './mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

In [None]:
!pwd

In [None]:
!cd ./mmdetection; mkdir checkpoints
!wget -O ./mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth

In [None]:
!ls -lia ./mmdetection/checkpoints

In [None]:
cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

In [None]:
name

In [None]:
!pwd

In [None]:
from mmcv.runner import HOOKS, Hook

# Adding the SaveBestCheckpointHook class
# This class is designed to save the best model checkpoint based on a specified metric (e.g., bbox_mAP).
@HOOKS.register_module()
class SaveBestCheckpointHook(Hook):
    def __init__(self, out_dir, metric='bbox_mAP', save_optimizer=True):
        self.out_dir = out_dir  # directory where the best checkpoint will be saved
        self.metric = metric  # metric name to monitor and determine the best model
        self.save_optimizer = save_optimizer  # flag to decide whether to save optimizer state or not
        self.best_score = 0.0  # initialize the best score to 0

    def after_train_epoch(self, runner):
        # This method is called after each training epoch
        # It checks if the current epoch score is better than the best recorded so far and saves the model checkpoint if so
        if not self.every_n_epochs(runner, 1):
            return
        from mmcv.runner import save_checkpoint
        if runner.log_buffer.output.get(self.metric, 0) > self.best_score:
            self.best_score = runner.log_buffer.output[self.metric]
            save_checkpoint(runner.model, self.out_dir, optimizer=self.save_optimizer)

# Updating environment parameters for the dataset
cfg.dataset_type = 'HOD'  # Dataset type is set to 'HOD'
cfg.data_root = './faster_rcnn_data' + name  # Root directory path for data

# Updating type, data_root, ann_file, img_prefix environment parameters for train, val, and test datasets
cfg.data.train.type = 'HOD'
cfg.data.train.data_root = './faster_rcnn_data'+ name
cfg.data.train.ann_file = 'train.json'
cfg.data.train.img_prefix = 'JPEGImages'

cfg.data.val.type = 'HOD'
cfg.data.val.data_root = './faster_rcnn_data' + name
cfg.data.val.ann_file = 'val.json'
cfg.data.val.img_prefix = 'JPEGImages'

cfg.data.test.type = 'HOD'
cfg.data.test.data_root = './faster_rcnn_data' + name
cfg.data.test.ann_file = 'test.json'
cfg.data.test.img_prefix = 'JPEGImages'

# Updating the number of classes
cfg.model.roi_head.bbox_head.num_classes = 6  # Number of classes is set to 6

# Loading the pretrained model
cfg.load_from = './mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'  # Path to the pretrained model

# Setting the directory to save the training weight files
cfg.work_dir = './tutorial_exps_normal'  # Directory to save training logs and weight files

# Updating the learning rate environment parameter
cfg.optimizer.lr = 0.02 / 8  # Learning rate is set to 0.02 / 8
cfg.lr_config.warmup = None  # Warmup is disabled
cfg.log_config.interval = 2000  # Logging interval is set to 2000

# For CocoDataset, the metric should be set to 'bbox' (not mAP). Setting it to 'bbox' calculates mAP over a range of IoU thresholds (0.5 to 0.95)
cfg.evaluation.metric = 'bbox'
cfg.evaluation.classwise = True  # Additional setting for label-wise mAP

cfg.evaluation.interval = 2000  # Evaluation interval is set to 2000
cfg.checkpoint_config.interval = 5  # Checkpoint saving interval is set to 5

# Adding a setting to save the best performing model
# Adding a custom hook to the cfg setting
cfg.custom_hooks = [dict(type='SaveBestCheckpointHook', out_dir=cfg.work_dir, metric='bbox_mAP', save_optimizer=True)]

# If the config is loaded twice, the lr_config's policy disappears. So, it's set here again.
cfg.lr_config.policy='step'  # Setting the learning rate policy to 'step'

# Setting seed for reproducibility
cfg.seed = 0  # Seed is set to 0
set_random_seed(0, deterministic=False)  # Setting random seed with deterministic set to False
cfg.gpu_ids = range(1)  # Setting GPU IDs

# Changing the evaluation metric since a customized dataset is used
cfg.device = 'cuda'  # Setting device to cuda

cfg.runner.max_epochs = 150  # Setting max epochs to 150for training


In [None]:
print(cfg.pretty_text)

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# Create a dataset for training
datasets = [build_dataset(cfg.data.train)]

In [None]:
# Print the first dataset to check its content
print(datasets[0])

# Using datasets[0].__dict__ to view all the self variables' keys and values.
datasets[0].__dict__.keys()

In [None]:
datasets[0].data_infos

In [None]:
datasets[0].pipeline

In [None]:
model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES
print(model.CLASSES)

In [None]:
import os.path as osp
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True) 

In [None]:
!pwd

### Inference and Result Visualization
#### Paper Section: 4.3 Testing Procedure

In [None]:
import cv2
from mmdet.apis import inference_detector, init_detector, show_result_pyplot

# Loading a sample image for inference
img = cv2.imread('./faster_rcnn_data/rcnn_normal/JPEGImages/img_hod_001544.jpg')

# Setting the configuration for the model
model.cfg = cfg

# Performing inference on the sample image
result = inference_detector(model, img)

# Visualizing the inference results
show_result_pyplot(model, img, result)

#### This marks the end of code snippet for training a Faster-RCNN model on the HOD dataset for normal cases.