In [8]:
from pathlib import Path
from PIL import Image
import numpy as np


def get_image_shape(text_file: str | Path, outfolder: str | Path):
    text_file = Path(text_file)
    components = str(text_file.name).split(".")
    if components[-1].lower() != "txt":
        raise OSError("Must be a text file")

    image_id = components[0]
    image_path =  Path(outfolder) / (image_id + ".jpg")

    if not Path(image_path).exists():
        raise FileNotFoundError("Image file not found")

    image = Image.open(image_path)
    width, height = image.size
    return width, height


def process_text(text_file: str | Path, outfolder: str | Path, test_only: bool = False):
    text_file = Path(text_file)
    with text_file.open("r") as infile: 
        width, height = get_image_shape(text_file, outfolder)
        data = ""
        new_data = ""

        for line in infile:
            if test_only:
                data += f"{line}\n"

            line_data = line.strip().split(",")
            coords = line_data[:8]
            coords[::2] = [str(round(float(coord) / width, 6)) for coord in coords[::2]]
            coords[1::2] = [str(round(float(coord) / height, 6)) for coord in coords[1::2]]

            new_data += f"0 {' '.join(coords)}\n"
    
    if test_only:
        print(data)
        print(new_data)
    else:
        out_text_file = Path(outfolder) / text_file.name
        with out_text_file.open("w") as outfile:
            outfile.write(new_data)

In [9]:
! ls /mnt/data/data/Data_Ve_Chu/Data_Ve_Chu/train_gt | wc -l

9011


In [10]:
from PIL import Image

image = Image.open("/mnt/data/data/Data_Ve_Chu/Data_Ve_Chu/train_img/G0013473.jpg")
image.size

(158, 159)

In [11]:
from shutil import copy
from pathlib import Path


def copy_images(src, dst):
    src = Path(src)
    dst = Path(dst)

    dst.mkdir(parents=True, exist_ok=True)

    for file in src.glob("*"):
        if file.is_file():
            copy(file, dst)

In [12]:
src_img = "/mnt/data/data/Data_Ve_Chu/Data_Ve_Chu/train_img"
dst_img = "/mnt/data/data/Data_Ve_Chu/YOLO_2309/train"
# copy_images(src_img, dst_img)

In [13]:
src_val = "/mnt/data/data/Data_Ve_Chu/YOLO_2608/val"
src_val = "/mnt/data/data/Data_Ve_Chu/YOLO_2309/val"
# copy_images(src_val, dst_val)

In [14]:
process_text(
    "/mnt/data/data/Data_Ve_Chu/Data_Ve_Chu/train_gt/2.txt",
    "/mnt/data/data/Data_Ve_Chu/YOLO_2309/train", 
    test_only=True,
)

59,74,203,96,202,125,66,103,0


0 0.205575 0.406593 0.707317 0.527473 0.703833 0.686813 0.229965 0.565934



In [15]:
def norm_text_files(folder: str, outfolder: str):
    for file in Path(folder).iterdir():
        if file.is_file() and file.suffix.split(".")[-1] == "txt":
            process_text(file, outfolder)

In [16]:
norm_text_files(
    "/mnt/data/data/Data_Ve_Chu/Data_Ve_Chu/train_gt",
    "/mnt/data/data/Data_Ve_Chu/YOLO_2309/train", 
)

In [17]:
! ls /mnt/data/data/Data_Ve_Chu/YOLO_2309/train/*.txt | wc -l

9011
