In [None]:
import os
import re
import glob
import pickle
from pesummary.io import read

# Directory path
data_dir = "/data/wiay/gw-data-releases/2023/8177023/"

# Find all *_cosmo.h5 files only
h5_files = glob.glob(os.path.join(data_dir, "*_cosmo.h5"))

param_map = {
    "mass_1": "mass_1",
    "mass_2": "mass_2",
    "mass_ratio": "mass_ratio",
    "a_1": "a_1",
    "a_2": "a_2",
    "cos_tilt_1": "cos_tilt_1",
    "cos_tilt_2": "cos_tilt_2",
    "chi_eff": "chi_eff",
    "chi_p": "chi_p",
    "redshift": "redshift",
    "luminosity_distance": "luminosity_distance",
    "ra": "ra",
    "dec": "dec",
}

# Events to skip
skip_events = {
    "GW170817", "GW190425", "GW190814",
    "GW200105", "GW200115", "GW190426", "GW190917"
}

event_data = []

for path in h5_files:
    basename = os.path.basename(path)

    # Extract event name: Expect something like GW190521_XXXXXX or GW190521
    match = re.search(r"(GW\d{6}(?:_\d{6})?)", basename)
    if not match:
        print(f"Warning: No event name found in {basename}")
        continue
    event_name = match.group(1)

    year_str = event_name[2:4]
    try:
        year = int(year_str)
    except ValueError:
        print(f"Warning: Cannot parse year from event {event_name}, skipping.")
        continue

    if year < 19:
        continue
    if event_name in skip_events:
        continue

    try:
        data = read(path)
        samples_dict = data.samples_dict["C01:Mixed"]
    except (KeyError, AttributeError) as e:
        print(f"Skipping {event_name} due to error reading samples: {e}")
        continue

    event_dict = {"event": event_name}
    for posterior_key, injection_key in param_map.items():
        if posterior_key in samples_dict:
            event_dict[injection_key] = samples_dict[posterior_key]
        else:
            print(f"Warning: {posterior_key} missing in {event_name}, skipping parameter.")

    event_data.append(event_dict)

with open("O3b_all_parameter_posteriors.pkl", "wb") as f:
    pickle.dump(event_data, f)
