# 플라스틱 이미지 Annotation 

## 1. Labelbox(https://labelstud.io) 설치 및 실행
1. 설치: `!pip install -U label-studio`
1. 실행: `label-studio`
1. Plastic file 5장 `COCO *.json` 포맷으로 annotation

## 2. Annotation 완료 디렉토리 구성
> **plastic/** <br>
ㄴ annotations/ <br>
ㄴ images/

## 3. COCO JSON to YOLO TXT

In [4]:
import os
import json
import glob
from tqdm import tqdm
import shutil
from pycocotools.coco import COCO

def make_folders(path="output"):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)
    return path

def convert_bbox_coco2yolo(img_width, img_height, bbox):

    # YOLO bounding box format: [x_center, y_center, width, height]
    # (float values relative to width and height of image)
    x_tl, y_tl, w, h = bbox
    
    # normalization
    dw = 1.0 / img_width
    dh = 1.0 / img_height

    x_center = x_tl + w / 2.0
    y_center = y_tl + h / 2.0

    x = x_center * dw
    y = y_center * dh
    w = w * dw
    h = h * dh

    return [x, y, w, h]


def convert_coco_json_to_yolo_bbox(input_path, output_path):

    make_folders(output_path + '/labels')
    path = glob.glob(input_path + '/annotations/**.json')
    shutil.copytree(input_path + '/images', output_path + '/images')
    
    for json_file in path:
        with open(json_file) as f:
            json_data = json.load(f)

        label_file = os.path.join(output_path, "_darknet.labels")
        with open(label_file, "w") as f:
            for category in tqdm(json_data["categories"], desc="Categories"):
                category_name = category["name"]
                f.write(f"{category_name}\n")

        for image in tqdm(json_data["images"], desc="Annotation txt for each iamge"):
            img_id = image["id"]
            img_name = image["file_name"]
            img_width = image["width"]
            img_height = image["height"]

            anno_in_image = [anno for anno in json_data["annotations"] if anno["image_id"] == img_id]
            anno_txt = os.path.join(output_path + '/labels', img_name.split(".")[0] + ".txt")
            with open(anno_txt, "w") as f:
                for anno in anno_in_image:
                    category = anno["category_id"] - 1 # 데이터셋의 Class ID가 1부터 시작하기 때문에 -1을 해줌
                    bbox_COCO = anno["bbox"] # coco bbox values
                    x, y, w, h = convert_bbox_coco2yolo(img_width, img_height, bbox_COCO)
                    f.write(f"{category} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

    print("Converting COCO Json to YOLO BBox txt finished!")

In [5]:
output_path = '../dataset/plastic-yolo'
input_path = '../dataset/plastic'
convert_coco_json_to_yolo_bbox(input_path, output_path, )

Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 1001.27it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 264.49it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 324.91it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 499.32it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 500.27it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 1001.27it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 [00:00<00:00, 232.14it/s]
Categories: 100%|██████████| 4/4 [00:00<?, ?it/s]
Annotation txt for each iamge: 100%|██████████| 1/1 

Converting COCO Json to YOLO BBox txt finished!





## 4. Train/Val Split 폴더로 이동 

In [6]:
import glob
import random
import os
import shutil

# Get all paths to your images files and text files
PATH = '../dataset/plastic-yolo/'
img_paths = sorted(glob.glob(PATH+'images/**.jpg'))
txt_paths = sorted(glob.glob(PATH+'labels/**.txt'))

# Calculate number of files for training, validation
data_size = len(img_paths)
r = 0.7
train_size = int(data_size * 0.7)

# Shuffle two list
img_txt = list(zip(img_paths, txt_paths))
random.seed(43)
random.shuffle(img_txt)
img_paths, txt_paths = zip(*img_txt)

# Now split them
train_img_paths = img_paths[:train_size]
train_txt_paths = txt_paths[:train_size]

valid_img_paths = img_paths[train_size:]
valid_txt_paths = txt_paths[train_size:]

# Move them to train, valid folders
train_images_folder = PATH+'train/images' 
train_labels_folder = PATH+'train/labels' 
valid_images_folder = PATH+'val/images' 
valid_labels_folder = PATH+'val/labels' 

os.mkdir(PATH+'train')
os.mkdir(PATH+'val')
os.mkdir(train_images_folder)
os.mkdir(train_labels_folder)
os.mkdir(valid_images_folder)
os.mkdir(valid_labels_folder)

def move(paths, folder):
    for p in paths:
        shutil.move(p, folder)

move(train_img_paths, train_images_folder)
move(train_txt_paths, train_labels_folder)
move(valid_img_paths, valid_images_folder)
move(valid_txt_paths, valid_labels_folder)

os.rmdir(PATH+'images')
os.rmdir(PATH+'labels')