In [None]:
import os
from collections import defaultdict
from tqdm import tqdm

# =========================
# CONFIGURATION
# =========================
TRAIN_DIR = r"D:\Final_Semester_Project\AI_Attendance_System\ai-ml-model\DataSets\raw"
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
BUCKET_STEP = 100     # 100–199, 200–299, ...
MAX_BUCKET = 1000     # extend if needed

# =========================
# UTILITY FUNCTIONS
# =========================
def is_image_file(filename: str) -> bool:
    return os.path.splitext(filename.lower())[1] in IMAGE_EXTENSIONS


def count_images_in_directory(dir_path: str) -> int:
    count = 0
    for file in os.listdir(dir_path):
        if is_image_file(file):
            count += 1
    return count


# =========================
# MAIN LOGIC
# =========================
def main():
    if not os.path.isdir(TRAIN_DIR):
        raise FileNotFoundError(f"Train directory not found: {TRAIN_DIR}")

    class_dirs = [
        d for d in os.listdir(TRAIN_DIR)
        if os.path.isdir(os.path.join(TRAIN_DIR, d))
    ]

    print(f"Found {len(class_dirs)} class directories.\n")

    class_image_counts = {}
    range_counts = defaultdict(int)

    # Define ranges: (low, high)
    ranges = [
        (i, i + BUCKET_STEP - 1)
        for i in range(0, MAX_BUCKET, BUCKET_STEP)
    ]

    for class_name in tqdm(class_dirs, desc="Processing classes", unit="class"):
        class_path = os.path.join(TRAIN_DIR, class_name)
        image_count = count_images_in_directory(class_path)
        class_image_counts[class_name] = image_count

        for low, high in ranges:
            if low <= image_count <= high:
                range_counts[(low, high)] += 1
                break

    # =========================
    # RESULTS
    # =========================
    print("\n=== Image Count Per Class ===")
    for cls, count in sorted(class_image_counts.items(), key=lambda x: x[1]):
        print(f"{cls:<30} {count}")

    print("\n=== Directory Distribution (Ranges) ===")
    for low, high in ranges:
        print(f"Classes with {low:3d}-{high:3d} images : {range_counts[(low, high)]}")

    print("\nProcessing completed successfully.")


if __name__ == "__main__":
    main()
