<a href="https://colab.research.google.com/github/rahiakela/computer-vision-research-and-practice/blob/main/hands-on-computer-vision-with-detectron2/02-data-preparation/02_annotation_format_conversions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Annotation format conversions

In [None]:
!pip install pylabel

In [None]:
# Download the dataset and unzip it
!wget https://github.com/PacktPublishing/Hands-On-Computer-Vision-with-Detectron2/raw/main/datasets/yolo.zip
!unzip yolo.zip

In [None]:
!sudo apt-get install tree

In [4]:
!tree yolo

[01;34myolo[0m
├── [00mclasses.txt[0m
├── [01;34mimages[0m
│   └── [01;34mtest[0m
│       ├── [01;35m00000_102.jpg[0m
│       └── [01;35m00003_154.jpg[0m
└── [01;34mlabels[0m
    └── [01;34mtest[0m
        ├── [00m00000_102.txt[0m
        └── [00m00003_154.txt[0m

4 directories, 5 files


In [5]:
import os
import shutil
from glob import glob
from tqdm import tqdm

from pylabel import importer

##Converting YOLO to COCO datasets

In [6]:
annotations_path = "yolo/labels/test"
images_path = "yolo/images/test"
coco_dir = "coco/test"
os.makedirs(coco_dir, exist_ok=True)

In [7]:
txt_files = glob(os.path.join(annotations_path, "*.txt"))
img_files = glob(os.path.join(images_path, "*.jpg"))

# copy annotations
for f in tqdm(txt_files):
  shutil.copy(f, coco_dir)

# copy images
for f in tqdm(img_files):
  shutil.copy(f, coco_dir)

100%|██████████| 2/2 [00:00<00:00, 1160.89it/s]
100%|██████████| 2/2 [00:00<00:00, 2559.06it/s]


In [8]:
# reads the classes from the YOLO dataset
with open("yolo/classes.txt", "r") as f:
  classes = f.read().split("\n")

# load dataset
dataset = importer.ImportYoloV5(path=coco_dir, cat_names=classes, name="brain tumors")

Importing YOLO files...: 100%|██████████| 4/4 [00:00<00:00, 87.48it/s]


In [9]:
# exports the dataset to COCO format
coco_file = os.path.join(coco_dir, "_annotations.coco.json")
# Detectron requires the label index to start with index 1
dataset.export.ExportToCoco(coco_file, cat_id_index=1)

Exporting to COCO file...: 100%|██████████| 2/2 [00:00<00:00, 324.74it/s]


['coco/test/_annotations.coco.json']

In [10]:
# now delete yolo annotations in coco set
for f in txt_files:
  os.remove(f.replace(annotations_path, coco_dir))

Now, the conversion is completed.

In [11]:
!tree coco

[01;34mcoco[0m
└── [01;34mtest[0m
    ├── [01;35m00000_102.jpg[0m
    ├── [01;35m00003_154.jpg[0m
    └── [00m_annotations.coco.json[0m

1 directory, 3 files


##Converting Pascal VOC to COCO datasets

In [None]:
# Download the dataset and unzip it
!wget https://github.com/PacktPublishing/Hands-On-Computer-Vision-with-Detectron2/raw/main/datasets/voc.zip
!unzip voc.zip

In [13]:
!tree voc

[01;34mvoc[0m
└── [01;34mtest[0m
    ├── [01;35m00000_102.jpg[0m
    ├── [00m00000_102.xml[0m
    ├── [01;35m00003_154.jpg[0m
    └── [00m00003_154.xml[0m

1 directory, 4 files


In [14]:
voc_dir = "voc/test"
coco_dir = "coco/test"
os.makedirs(coco_dir, exist_ok=True)

In [16]:
xml_files = glob(os.path.join(voc_dir, "*.xml"))
img_files = glob(os.path.join(voc_dir, "*.jpg"))

# copy annotations
for f in tqdm(xml_files):
  shutil.copy(f, coco_dir)

# copy images
for f in tqdm(img_files):
  shutil.copy(f, coco_dir)

100%|██████████| 2/2 [00:00<00:00, 3470.67it/s]
100%|██████████| 2/2 [00:00<00:00, 2611.65it/s]


In [17]:
# load dataset
dataset = importer.ImportVOC(coco_dir, name="brain tumors")

Importing VOC files...: 100%|██████████| 7/7 [00:00<00:00, 813.44it/s]


In [18]:
# export the loaded dataset to a COCO style
coco_file = os.path.join(coco_dir, "_annotations.coco.json")

# Detectron requires the label index to start with index 1
dataset.export.ExportToCoco(coco_file, cat_id_index=1)

Exporting to COCO file...: 100%|██████████| 2/2 [00:00<00:00, 280.72it/s]


['coco/test/_annotations.coco.json']

In [19]:
# now delete yolo annotations in coco set
for f in xml_files:
  os.remove(f.replace(voc_dir, coco_dir))

In [20]:
!tree coco

[01;34mcoco[0m
└── [01;34mtest[0m
    ├── [01;35m00000_102.jpg[0m
    ├── [00m00000_102.txt[0m
    ├── [01;35m00003_154.jpg[0m
    ├── [00m00003_154.txt[0m
    └── [00m_annotations.coco.json[0m

1 directory, 5 files
