## Notebook to save only files of interest from /resnick/groups/sxs/vtommasi/EccRedTest2025

In [12]:
import os
import h5py
import re

base_dir = "/resnick/groups/sxs/vtommasi/EccRedTest2025"
output_file = "EccRedTest2025FilesofInterest.h5"

def ensure_unique_path(hdf, path):
    """Ensure dataset path is unique by appending a suffix if needed."""
    if path not in hdf:
        return path
    i = 1
    while f"{path}_{i}" in hdf:
        i += 1
    return f"{path}_{i}"

total_files = 0
for root, dirs, files in os.walk(base_dir):
    total_files += len(files)

print(f"Total number of files: {total_files}")

with h5py.File(output_file, 'w') as hdf:
    count = 0

    # Go through each EccRedTest2025*** directory
    for subdir in sorted(os.listdir(base_dir)):
        if not subdir.startswith("EccRedTest2025") or not subdir[-3:].isdigit():
            continue

        subdir_path = os.path.join(base_dir, subdir)
        if not os.path.isdir(subdir_path):
            continue

        # Extract relevant Ecc* directories
        def extract_ecc_number(dirname):
            match = re.match(r"Ecc(\d+)", dirname)
            return int(match.group(1)) if match else float('inf')

        ecc_dirs = [d for d in os.listdir(subdir_path)
                    if os.path.isdir(os.path.join(subdir_path, d)) and d.startswith("Ecc")]
        ecc_dirs.sort(key=extract_ecc_number)

        first_ecc = ecc_dirs[0] if ecc_dirs else None
        last_ecc = ecc_dirs[-1] if ecc_dirs else None

        # Get files of interest
        targets = [
            os.path.join(subdir_path, first_ecc, "Params.input"),
            os.path.join(subdir_path, last_ecc, "Params.input"),
            os.path.join(subdir_path, last_ecc, "Ev", "JoinedForEcc", "Fit_F2cos2_SS.dat"),
        ]

        for path in targets:
            if os.path.isfile(path):
                try:
                    with open(path, 'r') as f:
                        data = f.read()
                    rel_path = os.path.relpath(path, base_dir)
                    unique_path = ensure_unique_path(hdf, rel_path)
                    hdf.create_dataset(unique_path, data=data)
                    print(f"✅ Stored: {unique_path}")
                    count += 1
                except Exception as e:
                    print(f"⚠️ Failed to read {path}: {e}")
            else:
                print(f"❌ Missing file: {path}")

    print(f"\n🎉 Total files saved: {count}")


Total number of files: 137420
✅ Stored: EccRedTest2025001/Ecc0/Params.input
✅ Stored: EccRedTest2025001/Ecc0/Params.input_1
❌ Missing file: /resnick/groups/sxs/vtommasi/EccRedTest2025/EccRedTest2025001/Ecc0/Ev/JoinedForEcc/Fit_F2cos2_SS.dat
✅ Stored: EccRedTest2025002/Ecc0/Params.input
✅ Stored: EccRedTest2025002/Ecc3/Params.input
✅ Stored: EccRedTest2025002/Ecc3/Ev/JoinedForEcc/Fit_F2cos2_SS.dat
✅ Stored: EccRedTest2025003/Ecc0/Params.input
✅ Stored: EccRedTest2025003/Ecc1/Params.input
✅ Stored: EccRedTest2025003/Ecc1/Ev/JoinedForEcc/Fit_F2cos2_SS.dat
✅ Stored: EccRedTest2025004/Ecc0/Params.input
✅ Stored: EccRedTest2025004/Ecc3/Params.input
✅ Stored: EccRedTest2025004/Ecc3/Ev/JoinedForEcc/Fit_F2cos2_SS.dat
✅ Stored: EccRedTest2025005/Ecc0/Params.input
✅ Stored: EccRedTest2025005/Ecc2/Params.input
✅ Stored: EccRedTest2025005/Ecc2/Ev/JoinedForEcc/Fit_F2cos2_SS.dat
✅ Stored: EccRedTest2025006/Ecc0/Params.input
✅ Stored: EccRedTest2025006/Ecc1/Params.input
✅ Stored: EccRedTest2025006/Ecc