In [1]:
import json
import cv2
import os
import matplotlib.pyplot as plt
import shutil
import pandas as pd
from tqdm import tqdm

In [8]:
sample_type = "brightfield_sample"

In [9]:
input_path = f"/mnt/Enterprise/safal/AI_assisted_microscopy_system/cysts_dataset_all/{sample_type}"
output_path = f"/mnt/Enterprise/safal/AI_assisted_microscopy_system/yolov8/{sample_type}"


In [10]:
for fold in range(1, 6):
    print(f"Processing fold {fold}")
    in_fold_path = os.path.join(input_path, f"fold_{fold}")
    out_fold_path = os.path.join(output_path, f"fold_{fold}")

    for split in ["train", "val"]:
        print(f"Processing split {split}")
        coco_annos_file = os.path.join(in_fold_path, f"{sample_type}_coco_annos_{split}.json")
        coco_annos_file = json.load(open(coco_annos_file, "r"))

        images_df = pd.DataFrame(coco_annos_file["images"])
        images_df["file_name"] = images_df["file_name"].apply(lambda x: x.split("/")[-1])

        out_images_path = os.path.join(out_fold_path, split, "images")
        out_labels_path = os.path.join(out_fold_path, split, "labels")
        os.makedirs(out_images_path, exist_ok=True)
        os.makedirs(out_labels_path, exist_ok=True)

        for image in tqdm(images_df["file_name"].values):
            shutil.copy(os.path.join(input_path, "train", image), out_images_path)
        
        print("Finished copying images")

        annotations_df = pd.DataFrame(coco_annos_file["annotations"])

        for image in tqdm(images_df["file_name"].values):
            img_file = os.path.join(input_path, "train", image)
            img = cv2.imread(img_file)
            img_id = image.split(".")[0]
            img_id_int = images_df[images_df["file_name"] == image]["id"].values[0]
            img_height, img_width, _ = img.shape

            img_annos = annotations_df[annotations_df["image_id"] == images_df[images_df["file_name"] == image]["id"].values[0]].to_dict("records")

            if len(img_annos):
                out_file = os.path.join(out_labels_path, f"{img_id}.txt")
                os.makedirs(os.path.dirname(out_file), exist_ok=True)
                with open(out_file, "w") as f:
                    for anno in img_annos:
                        x = anno["bbox"][0]
                        y = anno["bbox"][1]
                        w = anno["bbox"][2]
                        h = anno["bbox"][3]

                        x_center = (x + w/2)/img_width
                        y_center = (y + h/2)/img_height
                        w = w/img_width
                        h = h/img_height

                        f.write(f"{anno['category_id']} {x_center} {y_center} {w} {h}\n")


Processing fold 1
Processing split train


100%|██████████| 805/805 [00:01<00:00, 408.18it/s]


Finished copying images


100%|██████████| 805/805 [00:06<00:00, 126.06it/s]


Processing split val


100%|██████████| 202/202 [00:00<00:00, 210.83it/s]


Finished copying images


100%|██████████| 202/202 [00:01<00:00, 129.72it/s]


Processing fold 2
Processing split train


100%|██████████| 805/805 [00:00<00:00, 3683.03it/s]


Finished copying images


100%|██████████| 805/805 [00:06<00:00, 127.17it/s]


Processing split val


100%|██████████| 202/202 [00:00<00:00, 2023.86it/s]


Finished copying images


100%|██████████| 202/202 [00:01<00:00, 127.79it/s]


Processing fold 3
Processing split train


100%|██████████| 806/806 [00:00<00:00, 2818.77it/s]


Finished copying images


100%|██████████| 806/806 [00:06<00:00, 128.30it/s]


Processing split val


100%|██████████| 201/201 [00:00<00:00, 3605.88it/s]


Finished copying images


100%|██████████| 201/201 [00:01<00:00, 131.56it/s]


Processing fold 4
Processing split train


100%|██████████| 806/806 [00:00<00:00, 3077.76it/s]


Finished copying images


100%|██████████| 806/806 [00:06<00:00, 126.29it/s]


Processing split val


100%|██████████| 201/201 [00:00<00:00, 3733.34it/s]


Finished copying images


100%|██████████| 201/201 [00:01<00:00, 135.13it/s]


Processing fold 5
Processing split train


100%|██████████| 806/806 [00:00<00:00, 3579.78it/s]


Finished copying images


100%|██████████| 806/806 [00:06<00:00, 125.34it/s]


Processing split val


100%|██████████| 201/201 [00:00<00:00, 3015.87it/s]


Finished copying images


100%|██████████| 201/201 [00:01<00:00, 134.46it/s]
