In [None]:
import h5py
import json
import sys

CONVERGENCE_FILE = "./pp_verify_convergence_dc_2.h5"
EOS_FILE = "./pp_verify_transferability_eos_200.h5"
OUTPUT_JSON = "si_convergence.json"

In [None]:
def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)

In [None]:
element_pps_mapping = {}


def curated_by_element(name: str, obj):
    if "/" in name:
        return
    el = obj.attrs.get("element")
    if el is None:
        raise ValueError(f"element attr of {obj} is None")
    element_pps_mapping.setdefault(el, []).append(name)

In [None]:
converge_h5 = h5py.File(CONVERGENCE_FILE)
eos_h5 = h5py.File(EOS_FILE)

In [None]:
element = "Si"
dual = 8
conff = "dc"

pseudos = []

converge_h5.visititems(curated_by_element)

pps = element_pps_mapping.get(element, [])


lib_color_mapping = {
    "nc-dojo-v0.4.1-std": "#ffa500",
    "nc-spms-oncvpsp4": "#7f8001",  # XXX: new
    "nc-dojo-v0.4.1-str": "#ffb500",  #: TBD
    "nc-dojo-v0.5.0-std": "#ffc500",  #: TBD
    "nc-sg15-oncvpsp4": "#000000",
    "us-gbrv-v1.x-upf2": "#00cdcd",
    "us-psl-v1.0.0-high": "#ff0000",
    "us-psl-v1.0.0-low": "#fa0000",  # TBD
    "us-psl-v0.x": "#0000ff",
    "paw-jth-v1.1-std": "#984ea3",  # XXX: new TBD
    "paw-jth-v1.1-str": "#984fa3",  # TBD
    "paw-lanthanides-wentzcovitch": "#610b5e",
    "paw-psl-v0.x": "#ff00ff",
    "paw-psl-v1.0.0-high": "#008b00",
    "paw-psl-v1.0.0-low": "#008c00",  # TBD
    "paw-actinides-marburg": "#ea388e",
}

lib_abbr_name_mapping = {
    "nc-dojo-v0.4.1-std": "DOJO-041-std",
    "nc-spms-oncvpsp4": "SPMS",
    "nc-dojo-v0.4.1-str": "DOJO-041-str",
    "nc-dojo-v0.5.0-std": "DOJO-050-std",
    "nc-sg15-oncvpsp4": "SG15",
    "us-gbrv-v1.x-upf2": "GBRV-1.X",
    "us-psl-v1.0.0-high": "PSL-US-v1-high",
    "us-psl-v1.0.0-low": "PSL-US-v1-low",
    "us-psl-v0.x": "PSL-US-v0.x",
    "paw-jth-v1.1-std": "JTH-1.1-std",
    "paw-jth-v1.1-str": "JTH-1.1-str",
    "paw-lanthanides-wentzcovitch": "Wentzcovitch",
    "paw-psl-v0.x": "PSL-PAW-v0.x",
    "paw-psl-v1.0.0-high": "PSL-PAW-v1-high",
    "paw-psl-v1.0.0-low": "PSL-PAW-v1-low",
    "paw-actinides-marburg": "MARBURG",
}

MAX_CUTOFF = 200
MIN_CUTOFF = 30

output = {
    "conff": conff.upper(),
    "pseudos": [],
}

for pp_name in pps:
    dataset = converge_h5[pp_name]
    lib_name = dataset.attrs.get("lib_name")
    z_valence = dataset.attrs.get("z_valence")

    if lib_name is None:
        raise ValueError(f"lib_name of {dataset} is None")

    pp_data = {
        "name": lib_abbr_name_mapping.get(lib_name, lib_name),
        "color": lib_color_mapping.get(lib_name, "#000000"),
        "Z": int(z_valence),
        "quantities": {},
    }

    # Metadata
    try:
        eos_dataset = eos_h5[pp_name]
        avg_nu = 0
        n_nu = 0
        avg_nu_wo_xo3 = 0
        n_nu_wo_xo3 = 0
        max_nu = 0
        max_conf = "n/a"
        for conf, data in eos_dataset["transferability_eos"].items():
            nu = data.attrs.get("nu")
            avg_nu += nu
            n_nu += 1
            if conf != "XO3":
                avg_nu_wo_xo3 += nu
                n_nu_wo_xo3 += 1

            # max_nu = max(max_nu, nu)
            if nu > max_nu:
                max_nu = nu
                max_conf = conf
        avg_nu /= n_nu
        avg_nu_wo_xo3 /= n_nu_wo_xo3
        text_blob = (
            f"{lib_abbr_name_mapping[lib_name]}\n"
            + f"Z = {z_valence}\n"
            + f"avg.$\\nu$ = {avg_nu:.2f}\n"
            + f"max.$\\nu$ = {max_nu:.2f} ({max_conf})\n"
            + f"ang.$\\nu$ = {avg_nu_wo_xo3:.2f} (w/o XO3)"
        )
        pp_data["quantities"]["metadata"] = {
            "avg_nu": avg_nu,
            "max_nu": max_nu,
            "ang_nu": avg_nu_wo_xo3,
            "max_conf": max_conf,
        }
    except Exception as exc:
        pass

    # Phonon frequencies
    try:
        xs = dataset["convergence_phonon_frequencies"]["xs"][()]
        ys = dataset["convergence_phonon_frequencies"]["ys"][()]
        max_diff = dataset["convergence_phonon_frequencies"]["ys_relative_max_diff"][()]
        ref = dataset["convergence_phonon_frequencies"]["ys_omega_max"][-1]
        pp_data["quantities"]["phonon_frequencies"] = {
            "cutoffs": xs.tolist(),
            "values": ys.tolist(),
            "error": abs(abs(max_diff) - abs(ys)).tolist(),
            "ref": ref,
        }
    except Exception as exc:
        eprint(f"phonon frequencies missing for {pp_name}: {exc}")

    # Pressure
    try:
        xs = dataset["convergence_pressure"]["xs"][()]
        ys = dataset["convergence_pressure"]["ys"][()]
        pp_data["quantities"]["pressure"] = {
            "cutoffs": xs.tolist(),
            "values": ys.tolist(),
        }
    except Exception as exc:
        eprint(f"pressure missing for {pp_name}: {exc}")

    # Cohesive energy
    try:
        xs = dataset["convergence_cohesive_energy"]["xs"][()]
        ys = dataset["convergence_cohesive_energy"]["ys"][()]
        ref = dataset["convergence_cohesive_energy"]["ys_cohesive_energy_per_atom"][-1]
        pp_data["quantities"]["cohesive_energy"] = {
            "cutoffs": xs.tolist(),
            "values": ys.tolist(),
            "ref": float(ref),
        }
    except Exception as exc:
        eprint(f"cohesive energy missing for {pp_name}: {exc}")

    # EOS
    try:
        xs = dataset["convergence_eos"]["xs"][()]
        ys = dataset["convergence_eos"]["ys"][()]
        # ys *= 2 / EOS_C_FACTOR
        pp_data["quantities"]["eos"] = {
            "cutoffs": xs.tolist(),
            "values": ys.tolist(),
        }
    except Exception as exc:
        eprint(f"eos missing for {pp_name}: {exc}")

    # Bands data
    try:
        xs = dataset["convergence_bands"]["xs"][()].tolist()
        eta_c = dataset["convergence_bands"]["ys_eta_c"][()].tolist()
        max_diff_c = dataset["convergence_bands"]["ys_max_diff_c"][()].tolist()
        pp_data["quantities"]["bands"] = {
            "cutoffs": xs,
            "eta_c": eta_c,
            "max_diff_c": max_diff_c,
        }
    except Exception as exc:
        eprint(f"bands missing for {pp_name}: {exc}")

    output["pseudos"].append(pp_data)

# Dump to JSON
with open(OUTPUT_JSON, "w") as fh:
    json.dump(output, fh, indent=2)

print(f"Si data written to {OUTPUT_JSON}")
