Code Author: Ha Eungyeom (eungyeom_ha@yonsei.ac.kr)        
This code is developed for training and evaluating a YOLOv5 model on the HOD dataset. [GitHub Repository](https://github.com/poori-nuna/HOD-Benchmark-Dataset)

------

### Importing Necessary Libraries and Preparing Environment
#### Paper Section: 3.1 Environment Setup
This section sets up the environment by installing required libraries and downloading necessary files.

In [None]:
!pip install gdown

!gdown --id 1SPqvYv0azxPLS-UfFDEyD3F2RokvXmqQ -O data.zip
!mkdir data
!unzip data.zip -d data

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

if not os.path.exists("yolov5"):
    os.system("git clone https://github.com/poori-nuna/HOD_yolov5")

os.chdir("HOD_yolov5")    
os.system("pip install -qr requirements.txt")

### Training on all Cases (Normal Cases +  Hard Cases)
#### Paper Section: 4.1 Training Procedure
This section initiates the training process on the all dataset.

In [None]:
names = 'all'
img_size = 416
batch = 32
epoch = 100
model = 'yolov5s.yaml'

cmd = f"python train.py --img {img_size} --batch {batch} --epochs {epoch} --data ../data/yolo_{names}/data_all_to_normal.yaml --cfg ./models/custom_{model} --weights {model.split('.')[0]}.pt --name {names}_project_yolov5_{img_size}_{batch}_{epoch}_{model.split('.')[0]} --cache"
print(cmd)
os.system(cmd)

### Validation on all Cases
#### Paper Section: 4.2 Validation Strategy
This section carries out validation to select the optimal confidence threshold.

In [None]:
import subprocess
import re

weight_path = f"runs/train/{names}_project_yolov5_{img_size}_{batch}_{epoch}_{model.split('.')[0]}"
conf_values = [0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

best_conf = 0
best_map = 0

for conf in conf_values:
    cmd = f"python val.py --data ../data/yolo_{names}/data_all_to_normal.yaml --img {img_size} --weight {weight_path}/weights/best.pt --conf {conf} --half"
    result = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
    
    print("===============================================================================")
    print("confidence score :", conf)
    match = re.search(r"all\s+\d+\s+\d+\s+\d+\.\d+\s+\d+\.\d+\s+(\d+\.\d+)\s+(\d+\.\d+)", result)
    if match:
        mAP_value = float(match.group(1))
        sixth_value = float(match.group(2))
        print(f"For conf: {conf}, mAP50: {mAP_value}, mAP50-95: {sixth_value}")
    else:
        print(f"Failed to extract mAP and 6th value for conf: {conf}")
        continue
    print("===========================================================================================")

    if mAP_value > best_map:
        best_map = mAP_value
        best_conf = conf

print(f"Best --conf value: {best_conf} with mAP: {best_map}")

### Testing - Normal Cases
#### Paper Section: 4.3 Testing Procedure
This section performs testing on the Normal dataset.

In [None]:
conf_values = [0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

best_conf = 0
best_map = 0

for conf in conf_values:
    cmd = f"python val.py --task test --data ../data/yolo_{names}/data_all_to_normal.yaml --img {img_size} --weight {weight_path}/weights/best.pt --conf {conf} --half"
    result = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
    print(result)
    
    print("===============================================================================")
    print("confidence score :", conf)
    match = re.search(r"all\s+\d+\s+\d+\s+\d+\.\d+\s+\d+\.\d+\s+(\d+\.\d+)\s+(\d+\.\d+)", result)
    if match:
        mAP_value = float(match.group(1))
        sixth_value = float(match.group(2))
        print(f"For conf: {conf}, mAP50: {mAP_value}, mAP50-95: {sixth_value}")
    else:
        print(f"Failed to extract mAP and 6th value for conf: {conf}")
        continue
    print("===========================================================================================")

    if mAP_value > best_map:
        best_map = mAP_value
        best_conf = conf

print(f"Best --conf value: {best_conf} with mAP: {best_map}")

### Image Extraction - Normal Cases
#### Paper Section: 4.4 Result Interpretation
This section extracts images based on the final confidence threshold obtained from the validation.

In [None]:
final_conf = 0.25 # Best confidence
cmd = f"python detect.py --conf {final_conf} --weights {weight_path}/weights/best.pt --img {img_size} --source ../data/yolo_normal/test/images --save-txt --save-conf --project ./output --name={names}_to_normal_test_result_{img_size}_{batch}_{epoch}_{model.split('.')[0]} --exist-ok"
os.system(cmd)

### Testing - Hard Cases
#### Paper Section: 4.3 Testing Procedure (continued)
This section performs testing on the hard dataset.

In [None]:
conf_values = [0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

best_conf = 0
best_map = 0

for conf in conf_values:
    cmd = f"python val.py --task test --data ../data/yolo_{names}/data_all_to_hard.yaml --img {img_size} --weight {weight_path}/weights/best.pt --conf {conf} --half"
    result = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
    print(result)
    
    print("===============================================================================")
    print("confidence score :", conf)
    match = re.search(r"all\s+\d+\s+\d+\s+\d+\.\d+\s+\d+\.\d+\s+(\d+\.\d+)\s+(\d+\.\d+)", result)
    if match:
        mAP_value = float(match.group(1))
        sixth_value = float(match.group(2))
        print(f"For conf: {conf}, mAP50: {mAP_value}, mAP50-95: {sixth_value}")
    else:
        print(f"Failed to extract mAP and 6th value for conf: {conf}")
        continue
    print("===========================================================================================")

    if mAP_value > best_map:
        best_map = mAP_value
        best_conf = conf

print(f"Best --conf value: {best_conf} with mAP: {best_map}")

### Image Extraction - Hard Cases
#### Paper Section: 4.4 Result Interpretation (continued)
This section extracts images based on the final confidence threshold obtained from the validation.

In [None]:
final_conf = 0.25  # Best confidence
cmd = f"python detect.py --conf {final_conf} --weights {weight_path}/weights/best.pt --img {img_size} --source ../data/yolo_hard/test/images --save-txt --save-conf --project ./output --name={names}_to_hard_test_result_{img_size}_{batch}_{epoch}_{model.split('.')[0]} --exist-ok"
os.system(cmd)