In [9]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [10]:
# === CHANGE THESE PATHS ===
image_dir = "D:/SOP_BIO/dataset/images/train"
label_dir = "D:/SOP_BIO/wbc/txt"
output_dir = "D:/SOP_BIO/WBC_dataset"
split_ratio = 0.8

In [11]:
# === Create output folders ===
for split in ['train', 'val']:
    os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)

# === Collect valid image-label pairs ===
image_label_pairs = []
for label_file in os.listdir(label_dir):
    if not label_file.endswith(".txt"):
        continue

    image_name = label_file.replace('.txt', '.jpg')  # assume .jpg; change if using .png
    image_path = os.path.join(image_dir, image_name)
    label_path = os.path.join(label_dir, label_file)

    if os.path.exists(image_path):
        image_label_pairs.append((image_path, label_path))

# === Split the data ===
train_pairs, val_pairs = train_test_split(image_label_pairs, train_size=split_ratio, random_state=42)

# === Copy files ===
def copy_files(pairs, split):
    for img_path, label_path in pairs:
        shutil.copy(img_path, os.path.join(output_dir, 'images', split, os.path.basename(img_path)))
        shutil.copy(label_path, os.path.join(output_dir, 'labels', split, os.path.basename(label_path)))

copy_files(train_pairs, 'train')
copy_files(val_pairs, 'val')

print(f"✅ Copied {len(train_pairs)} training and {len(val_pairs)} validation pairs.")

✅ Copied 41 training and 11 validation pairs.


In [7]:
import os
import json
from PIL import Image

# === CHANGE THESE ===
image_dir = "D:/SOP_BIO/dataset/images/train"
json_dir = "D:/SOP_BIO/wbc"
output_txt_dir = "D:/SOP_BIO/wbc/txt"
os.makedirs(output_txt_dir, exist_ok=True)

# 
    #


In [8]:
#  === Iterate through JSON files ===
for json_file in os.listdir(json_dir):
    if not json_file.endswith(".json"):
        continue

    json_path = os.path.join(json_dir, json_file)

    with open(json_path, 'r') as f:
        data = json.load(f)

    # Each JSON contains a list of entries (one per image)
    for entry in data:
        image_name = entry['image']
        annotations = entry['annotations']
        img_path = os.path.join(image_dir, image_name)

        if not os.path.exists(img_path):
            print(f"⚠️ Image not found: {image_name}")
            continue

        # Get image size
        with Image.open(img_path) as img:
            img_w, img_h = img.size

        # Output YOLO file
        txt_path = os.path.join(output_txt_dir, os.path.splitext(image_name)[0] + ".txt")

        with open(txt_path, 'w') as out:
            for ann in annotations:
                cls_id = 0  # 'wbc' → class 0
                x_center = ann['coordinates']['x'] / img_w
                y_center = ann['coordinates']['y'] / img_h
                width = ann['coordinates']['width'] / img_w
                height = ann['coordinates']['height'] / img_h

                out.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

        print(f"✅ Converted: {image_name} → {txt_path}")

✅ Converted: 20190404151312.jpg → D:/SOP_BIO/wbc/txt\20190404151312.txt
✅ Converted: 20190404151511.jpg → D:/SOP_BIO/wbc/txt\20190404151511.txt
✅ Converted: 20190404151641.jpg → D:/SOP_BIO/wbc/txt\20190404151641.txt
✅ Converted: 20190404152216.jpg → D:/SOP_BIO/wbc/txt\20190404152216.txt
✅ Converted: 20190404152626.jpg → D:/SOP_BIO/wbc/txt\20190404152626.txt
✅ Converted: 20190404152850.jpg → D:/SOP_BIO/wbc/txt\20190404152850.txt
✅ Converted: 20190404153102.jpg → D:/SOP_BIO/wbc/txt\20190404153102.txt
✅ Converted: 20190404153544.jpg → D:/SOP_BIO/wbc/txt\20190404153544.txt
✅ Converted: 20190404154509.jpg → D:/SOP_BIO/wbc/txt\20190404154509.txt
✅ Converted: 20190404154805.jpg → D:/SOP_BIO/wbc/txt\20190404154805.txt
✅ Converted: 20190404155033.jpg → D:/SOP_BIO/wbc/txt\20190404155033.txt
✅ Converted: 20190404155314.jpg → D:/SOP_BIO/wbc/txt\20190404155314.txt
✅ Converted: 20190404155447.jpg → D:/SOP_BIO/wbc/txt\20190404155447.txt
✅ Converted: 20190404160306.jpg → D:/SOP_BIO/wbc/txt\20190404160