In [1]:
import os
from pathlib import Path
import shutil

import cv2
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

## Transfer annotated data to cvat_annotation

* Removes images with no annotations
* Combines multiple annotation folders

In [2]:
class_name = "table"
class_names = ["table", "gas_cylinder", "burner", "wok", "box"]
data_dirs = [
    Path(r"C:\Users\Admin\Downloads") / sub_dir
    for sub_dir in (
        "task_label table_combined-2021_05_26_01_41_19-yolo 1.1",
    )
]
output_dir = Path.cwd().parents[1] / "cvat_annotation" / class_name
data_split_dir = output_dir / "data_split"
train_dir = data_split_dir / "train"
test_dir = data_split_dir / "test"
train_images_dir = train_dir / "images"
train_labels_dir = train_dir / "labels"
test_images_dir = test_dir / "images"
test_labels_dir = test_dir / "labels"

output_dir.mkdir(parents=True, exist_ok=True)
train_images_dir.mkdir(parents=True, exist_ok=True)
train_labels_dir.mkdir(parents=True, exist_ok=True)
test_images_dir.mkdir(parents=True, exist_ok=True)
test_labels_dir.mkdir(parents=True, exist_ok=True)

In [3]:
# Convert all to jpg
for data_dir in data_dirs:
    for path in (data_dir / "obj_train_data").glob("*.png"):
        im = Image.open(path)
        im.convert("RGB").save(path.with_suffix(".jpg"), "JPEG")
        os.remove(path)

count = 0
for data_dir in data_dirs:
    for path in (data_dir / "obj_train_data").glob("*.jpg"):
        shutil.copy(path, output_dir / f"{class_name}_scraped_{count}.jpg")
        shutil.copy(path.with_suffix(".txt"), output_dir / f"{class_name}_scraped_{count}.txt")
        count += 1

for path in output_dir.glob("*.jpg"):
    im = Image.open(path)
    try:
        if im.is_animated:
            im.convert("RGB").save(path, "JPEG")
    except AttributeError:
        pass

In [4]:
# Split data
image_list = list(output_dir.glob("*.jpg"))
image_train, image_test = train_test_split(image_list, test_size=0.3, random_state=1234)


In [5]:
for path in image_train:
    shutil.copy(path, train_images_dir / path.name)
    shutil.copy(path.with_suffix(".txt"), train_labels_dir / f"{path.stem}.txt")

for path in image_test:
    shutil.copy(path, test_images_dir / path.name)
    shutil.copy(path.with_suffix(".txt"), test_labels_dir / f"{path.stem}.txt")

In [6]:
for path in train_labels_dir.iterdir():
    with open(path, "r+") as file:
        l = [x.split() for x in file.read().strip().splitlines()]
        file.seek(0)
        for line_parts in np.unique(l, axis=0):
            file.write(f"{class_names[int(line_parts[0])]} {' '.join(line_parts[1:])}\n")

In [7]:
for path in test_labels_dir.iterdir():
    with open(path, "r+") as file:
        l = [x.split() for x in file.read().strip().splitlines()]
        file.seek(0)
        for line_parts in np.unique(l, axis=0):
            file.write(f"{class_names[int(line_parts[0])]} {' '.join(line_parts[1:])}\n")

## Transfer from syndata to oidv6_data_converted

Single class

In [4]:
class_name = "cart"

data_dir = Path.cwd().parent / "hawking_output_dir"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_image_dir = output_dir / "images"
output_label_dir = output_dir / "labels"

output_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

In [5]:
subset = "train"
(output_image_dir / subset).mkdir(parents=True, exist_ok=True)
(output_label_dir / subset).mkdir(parents=True, exist_ok=True)
for path in (data_dir / "images").glob("*.jpg"):
    path_stem = path.stem
    shutil.copy(path, output_image_dir / subset / f"{class_name}_{path_stem}.jpg")
    with open(data_dir / "annotations" / f"{path_stem.split('_')[0]}.txt", "r") as infile, open(
        output_label_dir / subset / f"{class_name}_{path_stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(f"0 {' '.join(line_parts[1:])}\n")

## Transfer from cvat annotated to oidv6_data_converted

Single class

In [2]:
class_name = "burner"
rewrite = True

data_dir = Path.cwd().parents[1] / "cvat_annotation"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_image_dir = output_dir / "images"
output_label_dir = output_dir / "labels"

output_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

In [3]:
for subset in ("train", "test"):
    (output_image_dir / subset).mkdir(parents=True, exist_ok=True)
    (output_label_dir / subset).mkdir(parents=True, exist_ok=True)
    for path in (data_dir / class_name / "data_split" / subset / "images").glob("*.jpg"):
        shutil.copy(path, output_image_dir / subset / path.name)
        if rewrite:
            with open(path.parents[1] / "labels" / f"{path.stem}.txt", "r") as infile, open(
                output_label_dir / subset / f"{path.stem}.txt", "w"
            ) as outfile:
                l = [x.split() for x in infile.read().strip().splitlines()]
                for line_parts in np.unique(l, axis=0):
                    if line_parts[0] == class_name:
                        outfile.write(f"0 {' '.join(line_parts[1:])}\n")
        else:
            shutil.copy(
                path.parents[1] / "labels" / f"{path.stem}.txt",
                output_label_dir / subset / f"{path.stem}.txt",
            )

## Transfer from cvat annotated to oidv6_data_converted

Multiclass class

In [16]:
class_name = "table"
class_names = ["apple", "banana", "orange", "wheelchair", "wok", "box", "table", "tissue", "gas_cylinder", "burner", "cart"]

cvat_data_dir = Path.cwd().parents[1] / "cvat_annotation"
single_class_data_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}_multi"
output_image_dir = output_dir / "images"
output_label_dir = output_dir / "labels"

output_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

In [17]:
for subset in ("train", "test"):
    (output_image_dir / subset).mkdir(parents=True, exist_ok=True)
    (output_label_dir / subset).mkdir(parents=True, exist_ok=True)
    for path in (cvat_data_dir / class_name / "data_split" / subset / "images").glob("*.jpg"):
        shutil.copy(path, output_image_dir / subset / path.name)
        with open(path.parents[1] / "labels" / f"{path.stem}.txt", "r") as infile, open(
            output_label_dir / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                if line_parts[0] in class_names:
                    outfile.write(f"{class_names.index(line_parts[0])} {' '.join(line_parts[1:])}\n")

In [7]:
subset = "train"
for path in (single_class_data_dir / "images" / subset).glob(f"{class_name}_[0-9]*.jpg"):
    shutil.copy(path, output_image_dir / subset / path.name)
    with open(single_class_data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_label_dir / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(f"{class_names.index(class_name)} {' '.join(line_parts[1:])}\n")

## Transfer from single class oidv6_data_converted to multiclass oidv6_data_converted
Includes relabeling

In [2]:
class_name = "gas_cylinder"
classes = ["apple", "banana", "orange", "wheelchair", "wok", "box", "table", "tissue", "gas_cylinder", "cart"]

data_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_9classes"

In [4]:
for subset in ("train", "test"):
    for path in (data_dir / "images" / subset).glob("*.jpg"):
        shutil.copy(path, output_dir / "images" / subset / path.name)
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(
                    f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n"
                )