In [1]:
import os
import shutil
from pathlib import Path
import random

In [7]:
project_root = r"D:\5TH SEM\DL\DL PROJECT\DL_Project_Equine Pain\EQUINE PAIN CODE"
TASK2_DIR = os.path.join(project_root, "dataset", "annotations", "task2_labels")

IMG_DIR  = os.path.join(TASK2_DIR, "images")   # ← your images are here
LBL_DIR  = os.path.join(TASK2_DIR, "labels")   # ← your .txt files are here

YOLO_DATASET_DIR = os.path.join(project_root, "dataset", "yolo_landmarks")

In [3]:
# ---  Create the required folders --- 
for split in ['train', 'val']:
    os.makedirs(os.path.join(YOLO_DATASET_DIR, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(YOLO_DATASET_DIR, 'labels', split), exist_ok=True)

In [None]:
# --- Load images and match with labels by filename stem ---
img_path = Path(IMG_DIR)
lbl_path = Path(LBL_DIR)

In [None]:
# --- Build dictionary: filename_without_extension → full .txt path ---
label_dict = {f.stem: f for f in lbl_path.glob("*.txt")}

In [None]:
# --- Supported image extensions (covers everything Label Studio uses) ---
image_extensions = ["*.jpg", "*.jpeg", "*.JPG", "*.JPEG", "*.png", "*.PNG"]

pairs = []

for pattern in image_extensions:
    for img_file in img_path.glob(pattern):
        if img_file.stem in label_dict:
            pairs.append((img_file, label_dict[img_file.stem]))
        else:
            print(f"Warning: No label found for image: {img_file.name}")

print(f"\nFound {len(pairs)} valid image-label pairs")

if len(pairs) == 0:
    print("ERROR: No pairs found! Check your folder paths and file extensions.")
    exit()


Found 480 valid image-label pairs


In [18]:
# --- Reproducible train/val split (90/10) ---
random.seed(42)          # Crucial for reproducibility
random.shuffle(pairs)

split_idx = int(0.9 * len(pairs))
train_pairs = pairs[:split_idx]
val_pairs   = pairs[split_idx:]

In [None]:
# --- Copy to YOLO format ---
def copy_split(pair_list, split_name):
    img_dest = os.path.join(YOLO_DATASET_DIR, 'images', split_name)
    lbl_dest = os.path.join(YOLO_DATASET_DIR, 'labels', split_name)
    
    for img_file, lbl_file in pair_list:
        shutil.copy(img_file, os.path.join(img_dest, img_file.name))
        shutil.copy(lbl_file, os.path.join(lbl_dest, lbl_file.name))

copy_split(train_pairs, 'train')
copy_split(val_pairs,   'val')


Success! Train = 432, Val = 48
YOLO dataset created at:
D:\5TH SEM\DL\DL PROJECT\DL_Project_Equine Pain\EQUINE PAIN CODE\dataset\yolo_landmarks


In [22]:
# 5. Final report
print(f"\nSUCCESS!")
print(f"   Train images : {len(train_pairs)}")
print(f"   Val images   : {len(val_pairs)}")
print(f"   Total        : {len(pairs)}")
print(f"\nYOLOv8 dataset ready at:")
print(f"   {YOLO_DATASET_DIR}")


SUCCESS!
   Train images : 432
   Val images   : 48
   Total        : 480

YOLOv8 dataset ready at:
   D:\5TH SEM\DL\DL PROJECT\DL_Project_Equine Pain\EQUINE PAIN CODE\dataset\yolo_landmarks
