In [1]:
import os
from pathlib import Path
import shutil

import cv2
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

## Transfer annotated data to cvat_annotation

* Removes images with no annotations
* Combines multiple annotation folders

In [54]:
class_name = "gas_cylinder"
data_dirs = [
    Path(r"C:\Users\Admin\Downloads") / sub_dir
    for sub_dir in (
        "task_label food_stall_gas_cylinder-2021_05_19_05_39_58-yolo 1.1",
        "task_label gas_cylinder_peddler-2021_05_19_09_36_37-yolo 1.1",
        "task_label gas_cylinder_street_vendor-2021_05_19_04_58_51-yolo 1.1",
        "task_label roadside_food_stall_gas_cylinder-2021_05_19_05_47_18-yolo 1.1",
    )
]
output_dir = Path.cwd().parents[1] / "cvat_annotation" / class_name
data_split_dir = output_dir / "data_split"
train_dir = data_split_dir / "train"
test_dir = data_split_dir / "test"
train_images_dir = train_dir / "images"
train_labels_dir = train_dir / "labels"
test_images_dir = test_dir / "images"
test_labels_dir = test_dir / "labels"

output_dir.mkdir(parents=True, exist_ok=True)
train_images_dir.mkdir(parents=True, exist_ok=True)
train_labels_dir.mkdir(parents=True, exist_ok=True)
test_images_dir.mkdir(parents=True, exist_ok=True)
test_labels_dir.mkdir(parents=True, exist_ok=True)

In [47]:
# Convert all to jpg
for data_dir in data_dirs:
    for path in (data_dir / "obj_train_data").glob("*.png"):
        im = Image.open(path)
        im.convert("RGB").save(path.with_suffix(".jpg"), "JPEG")
        os.remove(path)

count = 0
for data_dir in data_dirs:
    for path in (data_dir / "obj_train_data").glob("*.jpg"):
        shutil.copy(path, output_dir / f"{class_name}_scraped_{count}.jpg")
        shutil.copy(path.with_suffix(".txt"), output_dir / f"{class_name}_scraped_{count}.txt")
        count += 1

In [52]:
# Split data
image_list = list(output_dir.glob("*.jpg"))
image_train, image_test = train_test_split(image_list, test_size=0.3)


In [55]:
for path in image_train:
    shutil.copy(path, train_images_dir / path.name)
    shutil.copy(path.with_suffix(".txt"), train_labels_dir / f"{path.stem}.txt")

for path in image_test:
    shutil.copy(path, test_images_dir / path.name)
    shutil.copy(path.with_suffix(".txt"), test_labels_dir / f"{path.stem}.txt")

## Transfer from syndata to oidv6_data_converted

Single class

In [5]:
class_name = "gas_cylinder"

data_dir = Path.cwd().parent / "hawking_output_dir"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_image_dir = output_dir / "images"
output_label_dir = output_dir / "labels"

output_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

In [6]:
subset = "train"
(output_image_dir / subset).mkdir(parents=True, exist_ok=True)
(output_label_dir / subset).mkdir(parents=True, exist_ok=True)
for path in (data_dir / "images").glob("*.jpg"):
    path_stem = path.stem
    shutil.copy(path, output_image_dir / subset / f"{class_name}_{path_stem}.jpg")
    with open(data_dir / "annotations" / f"{path_stem.split('_')[0]}.txt", "r") as infile, open(
        output_label_dir / subset / f"{class_name}_{path_stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(
                f"0 {' '.join(line_parts[1:])}\n"
            )

## Transfer from cvat annotated to oidv6_data_converted

Single class

In [2]:
class_name = "gas_cylinder"

data_dir = Path.cwd().parents[1] / "cvat_annotation"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_image_dir = output_dir / "images"
output_label_dir = output_dir / "labels"

output_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

In [4]:
for subset in ("train", "test"):
    (output_image_dir / subset).mkdir(parents=True, exist_ok=True)
    (output_label_dir / subset).mkdir(parents=True, exist_ok=True)
    for path in (data_dir / class_name / "data_split" / subset / "images").glob("*.jpg"):
        shutil.copy(path, output_image_dir / subset / path.name)
        shutil.copy(
            path.parents[1] / "labels" / f"{path.stem}.txt",
            output_label_dir / subset / f"{path.stem}.txt",
        )

## Transfer from single class oidv6_data_converted to multiclass oidv6_data_converted
Includes relabeling

In [2]:
class_name = "gas_cylinder"
classes = ["apple", "banana", "orange", "wheelchair", "wok", "box", "table", "tissue", "gas_cylinder"]

data_dir = Path.cwd().parents[1] / f"oidv6_data_converted_{class_name}"
output_dir = Path.cwd().parents[1] / f"oidv6_data_converted_9classes"

In [4]:
for subset in ("train", "test"):
    for path in (data_dir / "images" / subset).glob("*.jpg"):
        shutil.copy(path, output_dir / "images" / subset / path.name)
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(
                    f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n"
                )

## Legacy functions

In [17]:
data_dir = Path.cwd().parent / "oidv6_data" / "multidata"
output_dir = Path.cwd().parent / "oidv6_data_converted_7classes"

classes = ["apple", "banana", "orange", "wheelchair", "wok", "box", "table"]
old_class_name = "kitchen_&_dining_room_table"
class_name = "table"

for path in (data_dir / "test").glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / "test" / f"{class_name}{path.name[len(old_class_name):]}")
    with open(data_dir / "test" / "labels" / f"{path.stem}.txt", "r") as infile, open(
        output_dir / "labels" / "test" / f"{class_name}{path.stem[len(old_class_name):]}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(
                f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n"
            )


In [23]:
data_dir = Path.cwd().parent / "oidv6_data_converted_7classes"
class_name = "table"
subset = "test"
for path in (data_dir / "images" / subset).glob("*.jpg"):
    if f"{class_name}_" in path.name and len(path.stem) > len("table_999_poisson"):
        curr_image = cv2.imread(str(path))
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r+") as file:
            l = [x.split() for x in file.read().strip().splitlines()]
            file.seek(0)
            for line_parts in np.unique(l, axis=0):
                x_min, y_min, x_max, y_max = map(float, line_parts[1:])
                file.write(
                    f"{line_parts[0]} "
                    f"{(x_min + x_max) / 2 / curr_image.shape[1]} "
                    f"{(y_min + y_max) / 2 / curr_image.shape[0]} "
                    f"{(x_max - x_min) / curr_image.shape[1]} "
                    f"{(y_max - y_min) / curr_image.shape[0]}\n"
                )

In [10]:
classes = ["table"]
data_dir = Path.cwd() / "hawking_output_dir"
# data_dir = Path.cwd().parent / "oidv6_data_converted_boxes"
# output_dir = Path.cwd().parent / "oidv6_data_converted_apple_banana_orange_wheelchair_wok_box"
output_dir = Path.cwd().parent / "oidv6_data_converted_7classes"

In [8]:
class_name = "table"
for path in (data_dir / "images").glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / "train" / f"{class_name}_{path.name}")
    with open(data_dir / "annotations" / f"{path.stem.split('_')[0]}.txt", "r") as infile, open(
        output_dir / "labels" / "train" / f"{class_name}_{path.stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(
                f"0 "
                f"{' '.join(line_parts[1:])}\n"
            )

## Relabel class

In [18]:
for label_file in (output_dir / "labels" / "train").glob("*.txt"):
    with open(label_file, "r+") as file:
        l = [x.split() for x in file.read().strip().splitlines()]
        file.seek(0)
        for line_parts in np.unique(l, axis=0):
            file.write(f"0 {' '.join(line_parts[1:])}\n")

for label_file in (output_dir / "labels" / "test").glob("*.txt"):
    with open(label_file, "r+") as file:
        l = [x.split() for x in file.read().strip().splitlines()]
        file.seek(0)
        for line_parts in np.unique(l, axis=0):
            file.write(f"0 {' '.join(line_parts[1:])}\n")

## Combine Dataset

In [10]:
class_name = "wok"
for path in (data_dir / "images" / "train").glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / "train" / f"{class_name}_{path.name}")
    with open(data_dir / "labels" / "train" / f"{path.stem}.txt", "r") as infile, open(
        output_dir / "labels" / "train" / f"{class_name}_{path.stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(f"1 {' '.join(line_parts[1:])}\n")


In [11]:
for path in (data_dir / "images" / "test").glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / "test" / f"{class_name}_{path.name}")
    with open(data_dir / "labels" / "test" / f"{path.stem}.txt", "r") as infile, open(
        output_dir / "labels" / "test" / f"{class_name}_{path.stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(f"1 {' '.join(line_parts[1:])}\n")

In [1]:
classes = ["apple", "banana", "orange", "wheelchair", "wok", "box", "table"]

In [17]:
class_name = "box"
subset = "train"
for path in (data_dir / "images" / subset).glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / subset / f"{class_name}_{path.name}")
    with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
        output_dir / "labels" / subset / f"{class_name}_{path.stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")

In [18]:
subset = "test"
for path in (data_dir / "images" / subset).glob("*.jpg"):
    shutil.copy(path, output_dir / "images" / subset / f"{class_name}_{path.name}")
    with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
        output_dir / "labels" / subset / f"{class_name}_{path.stem}.txt", "w"
    ) as outfile:
        l = [x.split() for x in infile.read().strip().splitlines()]
        for line_parts in np.unique(l, axis=0):
            outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")

In [13]:
data_dir = Path.cwd().parent / "oidv6_data_converted_table"
class_name = "table"
subset = "train"
for path in (data_dir / "images" / subset).glob("*.jpg"):
    if "{class_name}_" not in path.name:
        shutil.copy(path, output_dir / "images" / subset / f"{class_name}_{path.name}")
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{class_name}_{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")
    else:
        shutil.copy(path, output_dir / "images" / subset / path.name)
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")

In [14]:
data_dir = Path.cwd().parent / "oidv6_data_converted_table"
class_name = "table"
subset = "test"
for path in (data_dir / "images" / subset).glob("*.jpg"):
    if "{class_name}_" not in path.name:
        shutil.copy(path, output_dir / "images" / subset / f"{class_name}_{path.name}")
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{class_name}_{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")
    else:
        shutil.copy(path, output_dir / "images" / subset / path.name)
        with open(data_dir / "labels" / subset / f"{path.stem}.txt", "r") as infile, open(
            output_dir / "labels" / subset / f"{path.stem}.txt", "w"
        ) as outfile:
            l = [x.split() for x in infile.read().strip().splitlines()]
            for line_parts in np.unique(l, axis=0):
                outfile.write(f"{classes.index(class_name)} {' '.join(line_parts[1:])}\n")