In [None]:
import sys
from pathlib import Path

def analyze_directory(root_path_str: str):
    """
    Analyzes a root directory, counts files (samples) within each 
    immediate subdirectory, and generates a distribution summary.
    """
    
    # 1. Convert string path to Path object
    root_path = Path(root_path_str)

    # 2. Validate path
    if not root_path.exists():
        print(f"[ERROR] Path '{root_path}' does not exist.", file=sys.stderr)
        return
    if not root_path.is_dir():
        print(f"[ERROR] Path '{root_path}' is not a directory.", file=sys.stderr)
        return

    # 3. Storage for counts
    counts_by_subdir = {}
    total_samples = 0

    # 4. Iterate through subdirectories
    for item in root_path.iterdir():
        if item.is_dir():
            try:
                # Count only files, ignore sub-folders
                file_count = sum(1 for f in item.iterdir() if f.is_file())
                counts_by_subdir[item.name] = file_count
                total_samples += file_count
            except PermissionError:
                print(f"[WARN] Permission denied for '{item.name}'.", file=sys.stderr)
            except Exception as e:
                print(f"[ERROR] Failed to read '{item.name}': {e}", file=sys.stderr)

    # 5. Generate Summary Table
    print("=======================================================================")
    print("SAMPLE DISTRIBUTION SUMMARY")
    print(f"Target Directory: {root_path}")
    print("=======================================================================")

    if total_samples == 0:
        print("[INFO] No samples found in subdirectories.")
        print("-" * 71)
        return

    # Table Header
    print(f"{'Subdirectory':<40} | {'Samples':>10} | {'Percentage (%)':>15}")
    print("-" * 71)

    # Print rows sorted alphabetically
    for subdir_name in sorted(counts_by_subdir.keys()):
        count = counts_by_subdir[subdir_name]
        percentage = (count / total_samples) * 100 if total_samples > 0 else 0
        print(f"{subdir_name:<40} | {count:>10} | {percentage:>15.2f}%")

    # Print Total
    print("-" * 71)
    print(f"{'TOTAL':<40} | {total_samples:>10} | {'100.00':>15}%")
    print("=======================================================================")


# --- EXECUTION ---

if __name__ == "__main__":
    # CONFIGURATION: Set the relative path to analyze
    # Example: analyzing the pseudo-labels generated by ResNet18
    target_path = Path("results/Carrier_C1_675/pseudo_labels/resnet18")
    
    analyze_directory(target_path)