In [1]:
import os
import shutil
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from pathlib import Path

In [9]:
# ====== CONFIG ======
IMG_DIR = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\2. augmented images'                  # Your image folder
CSV_PATH = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\annotations_yolo.csv'        # Your CSV file
TRAIN_IMG_DIR = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\images_train'
TEST_IMG_DIR = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\images_test'
TRAIN_LABEL_DIR = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\labels_train'
TEST_LABEL_DIR = r'C:\Users\prita\OneDrive\Desktop\HYPERBOLA DETECTION LAB DATASET\labels_test'
TEST_SIZE = 0.2

In [3]:
# ====== SETUP FOLDERS ======
for folder in [TRAIN_IMG_DIR, TEST_IMG_DIR, TRAIN_LABEL_DIR, TEST_LABEL_DIR]:
    os.makedirs(folder, exist_ok=True)

# ====== READ ANNOTATIONS ======
df = pd.read_csv(CSV_PATH)

In [17]:
# Extract just the image filename from the full Windows-style path
df['filename'] = df['path'].apply(lambda x: Path(x).name)

# Get list of unique images
unique_images = df['filename'].unique()


In [18]:
# ====== SPLIT TRAIN/TEST ======
train_imgs, test_imgs = train_test_split(unique_images, test_size=TEST_SIZE, random_state=42)

In [19]:
# ====== YOLO CONVERSION FUNCTION ======
def convert_to_yolo(row, w, h):
    x1, y1, x2, y2 = row['x1'], row['y1'], row['x2'], row['y2']
    x_center = (x1 + x2) / 2 / w
    y_center = (y1 + y2) / 2 / h
    width = (x2 - x1) / w
    height = (y2 - y1) / h
    return f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\\n"  # class 0

In [20]:
# ====== PROCESS FUNCTION ======
def process_subset(image_list, subset_name):
    img_dst = TRAIN_IMG_DIR if subset_name == 'train' else TEST_IMG_DIR
    label_dst = TRAIN_LABEL_DIR if subset_name == 'train' else TEST_LABEL_DIR

    for img_name in image_list:
        src_img = os.path.join(IMG_DIR, img_name)
        dst_img = os.path.join(img_dst, img_name)
        label_file = os.path.join(label_dst, Path(img_name).stem + '.txt')

        # Copy image
        if not os.path.exists(src_img):
            print(f"⚠️ Missing image: {src_img}")
            continue
        shutil.copy2(src_img, dst_img)

        # Load image to get width and height
        img = cv2.imread(src_img)
        if img is None:
            print(f"⚠️ Can't read image: {src_img}")
            continue
        h, w = img.shape[:2]

        # Filter annotations for this image
        rows = df[df['filename'] == img_name]

        # Write label file in YOLO format
        with open(label_file, 'w') as f:
            for _, row in rows.iterrows():
                f.write(convert_to_yolo(row, w, h))


In [21]:
# ====== RUN ======
process_subset(train_imgs, 'train')
process_subset(test_imgs, 'test')

print("✅ Done! Images and labels saved in train/test folders.")

✅ Done! Images and labels saved in train/test folders.
