In [None]:
"""
Batch YOLO Dataset Creator with Global Normalization
===================================================
Processes all GeoTIFF files in a directory using ONE shapefile.
Applies GLOBAL P2–P98 normalization (scene-consistent).
Safe for RGB, thermal, and RGB–Thermal composites.
"""

import os
import random
from pathlib import Path
from yolo_dataset_creator import YoloDatasetFromShp

# ============================================================
# CONFIGURATION
# ============================================================
TIF_DIRECTORY = r"C:\2025_Datasets"
SHAPEFILE_PATH = r"C:\labels_shp\labels.shp"
OUTPUT_BASE_DIR = r"C:\IFA\2025_Datasets\YOLO_dataset"

# Dataset parameters
CROP_SIZE   = 640
TRAIN_RATIO = 0.8
VALID_RATIO = 0.1
TEST_RATIO  = 0.1
CLASS_NAMES = ["ifa_mound"]
RANDOM_SEED = 42

# Normalization parameters
NORMALIZE = True
NORM_MODE = "global"   # GLOBAL P2–P98 normalization

# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":

    os.makedirs(OUTPUT_BASE_DIR, exist_ok=True)

    # --------------------------------------------------------
    # Collect GeoTIFFs safely (no duplicates, case-insensitive)
    # --------------------------------------------------------
    tif_files = sorted({
        f.resolve()
        for f in Path(TIF_DIRECTORY).iterdir()
        if f.is_file() and f.suffix.lower() == ".tif"
    })

    if not tif_files:
        raise RuntimeError(" No .tif files found in input directory")

    # Reproducible order
    random.seed(RANDOM_SEED)
    random.shuffle(tif_files)

    print("\n" + "=" * 70)
    print(f"Found {len(tif_files)} GeoTIFF file(s)")
    print(f"Normalization: {NORM_MODE} P2–P98" if NORMALIZE else "No normalization")
    print("=" * 70 + "\n")

    # --------------------------------------------------------
    # Process each image
    # --------------------------------------------------------
    for idx, tif_file in enumerate(tif_files, start=1):

        print(f"[{idx}/{len(tif_files)}] Processing: {tif_file.name}\n")

        output_dir = os.path.join(
            OUTPUT_BASE_DIR,
            f"Dataset_{tif_file.stem}"
        )

        try:
            creator = YoloDatasetFromShp(
                input_tif=str(tif_file),
                input_shp=SHAPEFILE_PATH,
                output_dir=output_dir,
                crop_size=CROP_SIZE,
                train_ratio=TRAIN_RATIO,
                valid_ratio=VALID_RATIO,
                test_ratio=TEST_RATIO,
                class_names=CLASS_NAMES,
                normalize=NORMALIZE,
                norm_mode=NORM_MODE,
                random_seed=RANDOM_SEED
            )

            creator.create_dataset()

        except Exception as e:
            print(f" Error processing {tif_file.name}: {e}")

        print("=" * 70 + "\n")

    print(f" All datasets saved to:\n{OUTPUT_BASE_DIR}\n")
