In [None]:
import os
import random
import shutil

def split_yolo_dataset(
    labels_dir="dataset/labels",
    images_dir="NepaliDevanagariText",
    output_dir="data",
    train_ratio=0.8,
    val_ratio=0.1,
    test_ratio=0.1,
):
    """
    Splits annotated YOLO OBB dataset into train/val/test sets in 8:1:1 ratio.
    - labels_dir: where YOLO .txt annotation files live
    - images_dir: where the actual images are stored
    - output_dir: where the new YOLO dataset will be created
    """


    # Create output structure
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(output_dir, "images", split), exist_ok=True)
        os.makedirs(os.path.join(output_dir, "labels", split), exist_ok=True)

    # Get all annotation files
    label_files = [f for f in os.listdir(labels_dir) if f.endswith(".txt")]
    random.shuffle(label_files)

    # Split sizes
    total = len(label_files)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    splits = {
        "train": label_files[:train_end],
        "val": label_files[train_end:val_end],
        "test": label_files[val_end:]
    }

    # Copy files
    for split, files in splits.items():
        for label_file in files:
            label_src = os.path.join(labels_dir, label_file)
            label_dst = os.path.join(output_dir, "labels", split, label_file)

            shutil.copy(label_src, label_dst)

            # Find corresponding image
            base_name = os.path.splitext(label_file)[0]
            img_extensions = [".jpg", ".jpeg", ".png"]

            img_found = False
            for ext in img_extensions:
                img_src = os.path.join(images_dir, base_name + ext)
                if os.path.exists(img_src):
                    img_dst = os.path.join(output_dir, "images", split, base_name + ext)
                    shutil.copy(img_src, img_dst)
                    img_found = True
                    break

            if not img_found:
                print(f"[WARNING] No image found for {label_file}")

    print(f"✅ Dataset split complete! Saved in '{output_dir}'")
    print(f"Train: {len(splits['train'])}, Val: {len(splits['val'])}, Test: {len(splits['test'])}")

split_yolo_dataset()


✅ Dataset split complete! Saved in 'data'
Train: 56, Val: 7, Test: 8


In [None]:
import os
import shutil
%pip install pyyaml

In [None]:
import yaml


In [None]:
# def create_yolo_yaml_config(yaml_filepath, dataset_path, dataset_labels):

#     data = {'path':dataset_path,
#             'train': os.path.join('images', 'train'),
#             'validation': os.path.join('images', 'validation'),
#             'names':{i:label for i, label in enumerate(dataset_labels)}
#             }

#     # Save the changes to the file
#     with open(yaml_filepath, 'w') as fp:
#         yaml.dump(data, fp, sort_keys=False)


def create_yolo_yaml_config(
    yaml_filepath,
    dataset_path,
    dataset_labels,
    train_split="images/train",
    val_split="images/val",
    test_split="images/test",
    nc=1,
    task="detect",        # "detect", "segment", "classify", "obb"
    model="yolov8n.pt",   # default model, can change to yolov8s.pt, yolov8m.pt, etc.
    epochs=100,
    batch=16,
    img_size=640,
    lr0=0.01,
    optimizer="SGD"       # SGD, Adam, AdamW, RMSProp
):
    """
    Create a YOLOv8 YAML dataset config with customizable training parameters.
    """

    if nc is None:
        nc = len(dataset_labels)

    data = {
        # dataset setup
        "path": dataset_path,
        "train": train_split,
        "val": val_split,
        "test": test_split,
        "nc": nc,
        "names": {i: label for i, label in enumerate(dataset_labels)},

        # training settings (Ultralytics trainer reads these if passed via CLI or Python API)
        "task": task,
        "model": model,
        "epochs": epochs,
        "batch": batch,
        "imgsz": img_size,
        "lr0": lr0,
        "optimizer": optimizer,

        # augmentation defaults (can be tuned)
        "hsv_h": 0.015,  # image HSV-Hue augmentation (fraction)
        "hsv_s": 0.7,    # HSV-Saturation augmentation (fraction)
        "hsv_v": 0.4,    # HSV-Value augmentation (fraction)
        "flipud": 0.0,   # image flip up-down (probability)
        "fliplr": 0.5,   # image flip left-right (probability)
        "mosaic": 1.0,   # mosaic augmentation (probability)
        "mixup": 0.1     # mixup augmentation (probability)
    }

    # Save the YAML file
    with open(yaml_filepath, "w") as fp:
        yaml.dump(data, fp, sort_keys=False)

    print(f"✅ YOLO config YAML saved to {yaml_filepath}")


In [None]:
names = ['line']
dataset_path = os.path.abspath(os.path.join('.', 'data')) # recommended to use absolute path 
yaml_filepath = os.path.join('.', 'config.yaml')
create_yolo_yaml_config(yaml_filepath, dataset_path, names)
class_ids = 0

In [None]:
print(dataset_path, yaml_filepath)

In [None]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

In [None]:
from ultralytics import YOLO


# Load a model
model = YOLO("yolov8m.pt")  # build a new model from scratch

# Use the model
results = model.train(data='./config.yaml', 
                      epochs=100)  # train the model
