## Custom COCO data to YOLO format 

In [None]:
from pycocotools.coco import COCO
import requests
import shutil
import os
import json

### make YOLO structure

In [None]:
train_image_dir = './coco/images/train'
val_image_dir = './coco/images/val'
train_anno_dir = './coco/labels/train'
val_anno_dir = './coco/labels/val'

In [None]:
os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(val_image_dir, exist_ok=True)
os.makedirs(train_anno_dir, exist_ok=True)
os.makedirs(val_anno_dir, exist_ok=True)

In [None]:
coco = COCO('annotations/instances_train2017.json')
coco_val = COCO('annotations/instances_val2017.json')

### copy images to YOLO structure

In [None]:
# You can select classes in the coco dataset

class_name = ["dining table", "couch", "bed", "tv", "microwave", "toaster", "refrigerator", "oven", "hair drier"]

In [None]:
for label in class_name:
    name = [label]

    catIds = coco.getCatIds(catNms=name)
    imgIds = coco.getImgIds(catIds=catIds)
    images = coco.loadImgs(imgIds)

    catIds_val = coco_val.getCatIds(catNms=name)
    imgIds_val = coco_val.getImgIds(catIds=catIds_val)
    images_val = coco_val.loadImgs(imgIds_val)

    count = 0

    for im in images:
        shutil.copy('./train2017/' + images[count]["file_name"], './coco/images/train')
        count += 1
    print(len(images),"train images copy complete")

    count = 0

    for im in images_val:
        shutil.copy('./val2017/' + images_val[count]["file_name"], './coco/images/val')
        count += 1
    print(len(images_val),"validation images copy complete")

### make annotations : Train

In [None]:
# for combine with OpenImageDatasetV7

class_mapping_custom = {
    "dining table": 12,
    "couch": 6,
    "bed": 1,
    "tv": 26,
    "microwave": 14,
    "toaster": 27,
    "refrigerator": 18,
    "oven": 15,
    "hair drier": 9    
}

In [None]:
matching_category_ids = {
    67: "dining table",
    63: "couch",
    65: "bed",
    72: "tv",
    78: "microwave",
    80: "toaster",
    82: "refrigerator",
    79: "oven",
    89: "hair drier"
}

label_check = [67, 63, 65, 72, 78, 80, 82, 79, 89]

In [None]:
# this file contains : info, licenses, images, annotations, categories
# categories : supercategory, id, name==클래스명

with open('./annotations/instances_train2017.json', "r") as f:
    train_data = json.load(f)

In [None]:
# image_id : segmentation,area, iscrowd,image_id, bbox,category_id, id

def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in train_data['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [None]:
# file_name : {image_height, width}

def get_img(filename):
  for img in train_data['images']:
    if img['file_name'] == filename:
      return img

In [None]:
def get_class(image_id):
    cat_id = 0
    for ann in train_data['annotations']:
        if ann['image_id'] == image_id:
            cat_id = ann['category_id']
            return cat_id

In [None]:
file_names = os.listdir('./coco/images/train')

In [None]:
for filename in file_names:
    img = get_img(filename)
    img_id = img['id']
    img_w = img['width']
    img_h = img['height']


    img_ann = get_img_ann(img_id)

    if img_ann:
        file_object = open(f"./coco/labels/train/{filename[:12]}.txt", 'a')

        for ann in img_ann:
            # custom classes
            if ann['category_id'] in label_check:
                cat_id = ann['category_id']
                cn = matching_category_ids[cat_id]
                current_category = class_mapping_custom[cn]
                
                
                current_bbox = ann['bbox']

                x = current_bbox[0]
                y = current_bbox[1]
                w = current_bbox[2]
                h = current_bbox[3]

                # midpoints
                x_centre = (x + (x+w))/2
                y_centre = (y + (y+h))/2

                # Normalization 0~1
                x_centre = x_centre / img_w
                y_centre = y_centre / img_h
                w = w / img_w
                h = h / img_h

                # Limiting upto fix number of decimal places
                x_centre = format(x_centre, '.6f')
                y_centre = format(y_centre, '.6f')
                w = format(w, '.6f')
                h = format(h, '.6f')

                file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")

        file_object.close()

### make annotations : Val

In [None]:
with open('./annotations/instances_val2017.json', "r") as f:
    val_data = json.load(f)

In [None]:
val_file_names = os.listdir('./coco/images/val')

In [None]:
def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in val_data['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [None]:
def get_img(filename):
  for img in val_data['images']:
    if img['file_name'] == filename:
      return img

In [None]:
def get_class(image_id):
    cat_id = 0
    for ann in val_data['annotations']:
        if ann['image_id'] == image_id:
            cat_id = ann['category_id']
            return cat_id

In [None]:
for filename in val_file_names:
    img = get_img(filename)
    img_id = img['id']
    img_w = img['width']
    img_h = img['height']


    img_ann = get_img_ann(img_id)

    if img_ann:
        file_object = open(f"./coco/labels/val/{filename[:12]}.txt", 'a')

        for ann in img_ann:
            if ann['category_id'] in label_check:
                cat_id = ann['category_id']
                cn = matching_category_ids[cat_id]
                current_category = class_mapping_custom[cn]
                
                
                current_bbox = ann['bbox']

                x = current_bbox[0]
                y = current_bbox[1]
                w = current_bbox[2]
                h = current_bbox[3]

                x_centre = (x + (x+w))/2
                y_centre = (y + (y+h))/2

                x_centre = x_centre / img_w
                y_centre = y_centre / img_h
                w = w / img_w
                h = h / img_h

                x_centre = format(x_centre, '.6f')
                y_centre = format(y_centre, '.6f')
                w = format(w, '.6f')
                h = format(h, '.6f')

                file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")

        file_object.close()