diff --git a/dev_scripts/potcar_scrambler.py b/dev_scripts/potcar_scrambler.py index 0cf09dc5b77..7ca860f0495 100644 --- a/dev_scripts/potcar_scrambler.py +++ b/dev_scripts/potcar_scrambler.py @@ -27,7 +27,7 @@ class PotcarScrambler: Used to generate copyright-compliant POTCARs for PMG tests. - In case of questions, contact Aaron Kaplan . + In case of questions, contact Aaron Kaplan . Recommended use: PotcarScrambler.from_file( @@ -196,3 +196,21 @@ def potcar_cleanser() -> None: if __name__ == "__main__": potcar_cleanser() # generate_fake_potcar_libraries() + + """ + Note that vaspout.h5 files also contain full POTCARs. While the + Vaspout class in `pymatgen.io.vasp.outputs` contains a method to + replace the POTCAR with its spec (`remove_potcar_and_write_file`), + for test purposes, its often useful to have a fake POTCAR in place + of the real one. + + To use the scrambler on a vaspout.h5: + ``` + vout = Vaspout("< path to vaspout.h5>") + scrambled = PotcarScrambler(vout.potcar) + vout.remove_potcar_and_write_file( + filename = "< path to output vaspout.h5>", + fake_potcar_str = scrambled.scrambled_potcars_str + ) + ``` + """ diff --git a/src/pymatgen/io/vasp/__init__.py b/src/pymatgen/io/vasp/__init__.py index 59ca377e99e..39d3ef9b931 100644 --- a/src/pymatgen/io/vasp/__init__.py +++ b/src/pymatgen/io/vasp/__init__.py @@ -17,6 +17,7 @@ Oszicar, Outcar, Procar, + Vaspout, Vasprun, VolumetricData, Wavecar, diff --git a/src/pymatgen/io/vasp/inputs.py b/src/pymatgen/io/vasp/inputs.py index eee5d425b31..0241c4c7cab 100644 --- a/src/pymatgen/io/vasp/inputs.py +++ b/src/pymatgen/io/vasp/inputs.py @@ -2200,22 +2200,28 @@ def data_stats(data_list: Sequence) -> dict: data_match_tol: float = 1e-6 for ref_psp in possible_potcar_matches: - key_match = all( - set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) for key in ["header", "data"] - ) - - data_diff = [ - abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) - for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] - for key in ["header", "data"] - ] - data_match = all(np.array(data_diff) < data_match_tol) - - if key_match and data_match: + if self.compare_potcar_stats(ref_psp, self._summary_stats, tolerance=data_match_tol): return True return False + def spec(self, extra_spec: Sequence[str] | None = None) -> dict[str, Any]: + """ + POTCAR spec used in vasprun.xml. + + Args: + extra_spec : Sequence[str] or None (default) + A list of extra POTCAR fields to include in the spec. + If None, defaults to no extra spec. + Returns: + dict of POTCAR spec + """ + extra_spec = extra_spec or [] + spec = {"titel": self.TITEL, "hash": self.md5_header_hash, "summary_stats": self._summary_stats} + for attr in extra_spec: + spec[attr] = getattr(self, attr, None) + return spec + def write_file(self, filename: str) -> None: """Write PotcarSingle to a file. @@ -2325,6 +2331,47 @@ def verify_potcar(self) -> tuple[bool, bool]: return has_sha256, hash_is_valid + @staticmethod + def compare_potcar_stats( + potcar_stats_1: dict, + potcar_stats_2: dict, + tolerance: float = 1.0e-6, + check_potcar_fields: Sequence[str] = ["header", "data"], + ) -> bool: + """ + Compare PotcarSingle._summary_stats to assess if they are the same within a tolerance. + + Args: + potcar_stats_1 : dict + Dict of potcar summary stats from the first PotcarSingle, from the PotcarSingle._summary stats attr + potcar_stats_2 : dict + Second dict of summary stats + tolerance : float = 1.e-6 + Tolerance to assess equality of numeric statistical values + check_potcar_fields : Sequence[str] = ["header", "data"] + The specific fields of the POTCAR to check, whether just the "header", just the "data", or both + + Returns: + bool + Whether the POTCARs are identical according to their summary stats. + """ + + key_match = all( + set(potcar_stats_1["keywords"].get(key)) == set(potcar_stats_2["keywords"].get(key)) + for key in check_potcar_fields + ) + + data_match = False + if key_match: + data_diff = [ + abs(potcar_stats_1["stats"].get(key, {}).get(stat) - potcar_stats_2["stats"].get(key, {}).get(stat)) + for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] + for key in check_potcar_fields + ] + data_match = all(np.array(data_diff) < tolerance) + + return key_match and data_match + def identify_potcar( self, mode: Literal["data", "file"] = "data", @@ -2362,19 +2409,9 @@ def identify_potcar( if self.VRHFIN.replace(" ", "") != ref_psp["VRHFIN"]: continue - key_match = all( - set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) for key in check_modes - ) - - data_diff = [ - abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) - for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] - for key in check_modes - ] - - data_match = all(np.array(data_diff) < data_tol) - - if key_match and data_match: + if self.compare_potcar_stats( + ref_psp, self._summary_stats, tolerance=data_tol, check_potcar_fields=check_modes + ): identity["potcar_functionals"].append(func) identity["potcar_symbols"].append(ref_psp["symbol"]) @@ -2621,8 +2658,28 @@ def symbols(self, symbols: Sequence[str]) -> None: @property def spec(self) -> list[dict]: - """The atomic symbols and hash of all the atoms in the POTCAR file.""" - return [{"symbol": psingle.symbol, "hash": psingle.md5_computed_file_hash} for psingle in self] + """ + POTCAR spec for all POTCARs in this instance. + + Args: + extra_spec : Sequence[str] or None (default) + A list of extra POTCAR fields to include in the spec. + If None, defaults to ["symbol"] (needed for compatibility with LOBSTER). + + Return: + list[dict], a list of PotcarSingle.spec dicts + """ + return [psingle.spec(extra_spec=["symbol"]) for psingle in self] + + def write_potcar_spec(self, filename: str = "POTCAR.spec.json.gz") -> None: + """ + Write POTCAR spec to file. + + Args: + filename : str = "POTCAR.spec.json.gz" + The name of a file to write the POTCAR spec to. + """ + dumpfn(self.spec, filename) def as_dict(self) -> dict: """MSONable dict representation.""" @@ -2645,22 +2702,19 @@ def from_dict(cls, dct: dict) -> Self: return Potcar(symbols=dct["symbols"], functional=dct["functional"]) @classmethod - def from_file(cls, filename: PathLike) -> Self: + def from_str(cls, data: str): """ - Reads Potcar from file. + Read Potcar from a string. - Args: - filename: Filename + :param data: Potcar as a string. Returns: Potcar """ - with zopen(filename, mode="rt") as file: - fdata = file.read() - potcar = cls() - functionals: list[str | None] = [] - for psingle_str in fdata.split("End of Dataset"): + + functionals = [] + for psingle_str in data.split("End of Dataset"): if p_strip := psingle_str.strip(): psingle = PotcarSingle(f"{p_strip}\nEnd of Dataset\n") potcar.append(psingle) @@ -2672,6 +2726,20 @@ def from_file(cls, filename: PathLike) -> Self: potcar.functional = functionals[0] return potcar + @classmethod + def from_file(cls, filename: str): + """ + Reads Potcar from file. + + :param filename: Filename + + Returns: + Potcar + """ + with zopen(filename, mode="rt") as file: + fdata = file.read() + return cls.from_str(fdata) + def write_file(self, filename: PathLike) -> None: """Write Potcar to a file. @@ -2708,6 +2776,42 @@ def set_symbols( else: self.extend(PotcarSingle.from_symbol_and_functional(el, functional) for el in symbols) + @classmethod + def from_spec(cls, potcar_spec: list[dict], functionals: list[str] | None = None) -> Potcar: + """ + Generate a POTCAR from a list of POTCAR spec dicts. + + If a set of POTCARs *for the same functional* cannot be found, raises a ValueError. + Args: + potcar_spec: list[dict] + List of POTCAR specs, from Potcar.spec or [PotcarSingle.spec] + functionals : list[str] or None (default) + If a list of strings, the functionals to restrict the search to. + + Returns: + Potcar, a POTCAR using a single functional that matches the input spec. + """ + + functionals = functionals or list(PotcarSingle._potcar_summary_stats) + for functional in functionals: + potcar = Potcar() + matched = [False for _ in range(len(potcar_spec))] + for ispec, spec in enumerate(potcar_spec): + titel = spec.get("titel", "") + titel_no_spc = titel.replace(" ", "") + symbol = titel.split(" ")[1].strip() + + for stats in PotcarSingle._potcar_summary_stats[functional].get(titel_no_spc, []): + if PotcarSingle.compare_potcar_stats(spec["summary_stats"], stats): + potcar.append(PotcarSingle.from_symbol_and_functional(symbol=symbol, functional=functional)) + matched[ispec] = True + break + + if all(matched): + return potcar + + raise ValueError("Cannot match the give POTCAR spec to a set of POTCARs generated with the same functional.") + class UnknownPotcarWarning(UserWarning): """Warning raised when POTCAR hashes do not pass validation.""" diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 4ade52f6c9a..34726406a54 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -6,6 +6,7 @@ import math import os import re +import subprocess import warnings import xml.etree.ElementTree as ET from collections import defaultdict @@ -18,6 +19,7 @@ from typing import TYPE_CHECKING, cast import numpy as np +from monty.dev import requires from monty.io import reverse_readfile, zopen from monty.json import MSONable, jsanitize from monty.os.path import zpath @@ -37,14 +39,19 @@ from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from pymatgen.io.common import VolumetricData as BaseVolumetricData from pymatgen.io.core import ParseError -from pymatgen.io.vasp.inputs import Incar, Kpoints, Poscar, Potcar +from pymatgen.io.vasp.inputs import Incar, Kpoints, KpointsSupportedModes, Poscar, Potcar from pymatgen.io.wannier90 import Unk from pymatgen.util.io_utils import clean_lines, micro_pyawk from pymatgen.util.num import make_symmetric_matrix_from_upper_tri from pymatgen.util.typing import Kpoint, Tuple3Floats, Vector3D +try: + import h5py +except ImportError: + h5py = None + if TYPE_CHECKING: - from collections.abc import Callable + from collections.abc import Callable, Sequence from typing import Any, Literal # Avoid name conflict with pymatgen.core.Element @@ -1204,13 +1211,13 @@ def update_potcar_spec(self, path: PathLike | bool) -> None: Args: path (PathLike | bool): Path to search for POTCARs. + + Note that the vasprun.xml spec typically hasn't included the POTCAR symbols, + since these are derived from the TITEL. """ if potcar := self.get_potcars(path): self.potcar_spec = [ - {"titel": sym, "hash": ps.md5_header_hash, "summary_stats": ps._summary_stats} - for sym in self.potcar_symbols - for ps in potcar - if ps.symbol == sym.split()[1] + ps.spec(extra_spec=[]) for sym in self.potcar_symbols for ps in potcar if ps.symbol == sym.split()[1] ] def update_charge_from_potcar(self, path: PathLike | bool) -> None: @@ -5355,3 +5362,459 @@ def from_file(cls, filename: str) -> Self: class UnconvergedVASPWarning(Warning): """Warning for unconverged VASP run.""" + + +@requires(h5py is not None, "h5py must be installed to read vaspout.h5") +class Vaspout(Vasprun): + """ + Class to read vaspout.h5 files. + + This class inherits from Vasprun, as the vaspout.h5 file is intended + to be a forward-looking replacement for vasprun.xml. + + Thus to later accommodate a smooth transition to vaspout.h5, this class + uses the same structure as Vasprun but overrides many of its methods. + + Parameters + ----------- + filename : str or Path + The name of the vaspout.h5 file to parse, can be compressed. + occu_tol: float = 1e-8 + Sets the minimum tol for the determination of the + vbm and cbm. Usually the default of 1e-8 works well enough, + but there may be pathological cases. + parse_dos : bool = True + Whether to parse the dos. Defaults to True. Set + to False to shave off significant time from the parsing if you + are not interested in getting those data. + parse_eigen : bool = True + Whether to parse the eigenvalues. Defaults to + True. Set to False to shave off significant time from the + parsing if you are not interested in getting those data. + parse_projected_eigen : bool = False + Whether to parse the projected + eigenvalues and magnetization. Defaults to False. Set to True to obtain + projected eigenvalues and magnetization. **Note that this can take an + extreme amount of time and memory.** So use this wisely. + separate_spins : bool + Whether the band gap, CBM, and VBM should be + reported for each individual spin channel. Defaults to False, + which computes the eigenvalue band properties independent of + the spin orientation. If True, the calculation must be spin-polarized. + store_potcar : bool + Whether to store the full POTCAR data. + """ + + def __init__( + self, + filename: str | Path, + occu_tol: float = 1e-8, + parse_dos: bool = True, + parse_eigen: bool = True, + parse_projected_eigen: bool = False, + separate_spins: bool = False, + store_potcar: bool = True, + ) -> None: + self.filename = str(filename) + self.occu_tol = occu_tol + self.separate_spins = separate_spins + self.store_potcar = store_potcar + + self._parse(parse_dos, parse_eigen, parse_projected_eigen) + + @classmethod + def _parse_hdf5_value(cls, val: Any) -> Any: + """ + Parse HDF5 values recursively, turning it into a dict-like entry. + + This could be a staticmethod, but a recursive staticmethod seems to only + work in python >= 3.10. Using a classmethod to work with 3.9. + + Args: + val (Any), input value + Returns: + Any, output value. Recursion is performed until a bytes-like object is input. + """ + if hasattr(val, "items"): + val = {k: cls._parse_hdf5_value(v) for k, v in val.items()} + else: + val = np.array(val).tolist() + if isinstance(val, bytes): + val = val.decode() + elif isinstance(val, list): + val = [cls._parse_hdf5_value(x) for x in val] + return val + + def _parse(self, parse_dos: bool, parse_eigen: bool, parse_projected_eigen: bool) -> None: # type: ignore[override] + """ + Parse data contained in vaspout.h5. + + Args: + parse_dos (bool) + Whether to parse the DOS + parse_eigen (bool) + Whether to parse the bandstructure / electronic eigenvalues + parse_projected_eigen (bool) + Whether to parse the projected bandstructure. + TODO: this information is not currently included in vaspout.h5, add later? + """ + with zopen(self.filename, "rb") as vout_file, h5py.File(vout_file, "r") as h5_file: + data = self._parse_hdf5_value(h5_file) + + self._parse_params(data["input"]) + self._get_ionic_steps(data["intermediate"]["ion_dynamics"]) + + # ----- + # TODO: determine if these following fields are stored in vaspout.h5 + self.kpoints_opt_props = None + self.md_data = [] + # ----- + + self._parse_results(data["results"]) + + if parse_dos: + try: + self._parse_dos( + electron_dos=data["results"]["electron_dos"], + projectors=data["results"].get("projectors", {}).get("lchar", None), + ) + self.dos_has_errors = False + except Exception: + self.dos_has_errors = True + + if parse_eigen: + self.eigenvalues = self._parse_eigen(data["results"]["electron_eigenvalues"]) + + self.projected_eigenvalues = None + self.projected_magnetisation = None + if parse_projected_eigen: + # TODO: are these contained in vaspout.h5? + self.projected_eigenvalues = None + self.projected_magnetisation = None + + self.vasp_version = ".".join(f"{data['version'].get(tag,'')}" for tag in ("major", "minor", "patch")) + # TODO: are the other generator tags, like computer platform, stored in vaspout.h5? + self.generator = {"version": self.vasp_version} + + @staticmethod + def _parse_structure(positions: dict) -> Structure: # type: ignore[override] + """ + Parse the structure from vaspout format. + + Args: + positions (dict), dict representation of POSCAR + Returns: + pymatgen Structure + """ + species = [] + for ispecie, specie in enumerate(positions["ion_types"]): + species += [specie for _ in range(positions["number_ion_types"][ispecie])] + + # TODO : figure out how site_properties are stored in vaspout + site_properties: dict[str, list] = {} + if positions["selective_dynamics"] == 1: + site_properties["selective_dynamics"] = [] + + return Structure( + lattice=Lattice(positions["scale"] * np.array(positions["lattice_vectors"])), + species=species, + coords=positions["position_ions"], + coords_are_cartesian=(positions["direct_coordinates"] == 1), + ) + + @staticmethod + def _parse_kpoints(kpoints: dict) -> tuple[Kpoints, Sequence, Sequence]: # type: ignore[override] + _kpoints_style_from_mode = { + KpointsSupportedModes.Reciprocal: {"mode": "e", "coordinate_space": "R"}, + KpointsSupportedModes.Automatic: {"mode": "a"}, + KpointsSupportedModes.Gamma: {"mode": "g"}, + KpointsSupportedModes.Line_mode: {"mode": "l"}, + KpointsSupportedModes.Monkhorst: {"mode": "m"}, + } + kpts = {"comment": kpoints.get("system", "Unknown")} + + for kpoints_style, props in _kpoints_style_from_mode.items(): + if all(kpoints.get(k) == v for k, v in props.items()): + kpts["style"] = kpoints_style + break + + if not kpts.get("style"): + raise ValueError("Could not identify KPOINTS style.") + + if coord_type := kpoints.get("coordinate_space"): + kpts["coord_type"] = "Reciprocal" if coord_type == "R" else "Cartesian" + + actual_kpoints = [None] + actual_kpoint_weights = [None] + if kpoints.get("coordinates_kpoints"): + kpts["num_kpts"] = len(kpoints["coordinates_kpoints"]) + kpts["labels"] = [None for _ in range(kpts["num_kpts"])] + for i, idx in enumerate(kpoints.get("positions_labels_kpoints", [])): + kpts["labels"][idx - 1] = kpoints["labels_kpoints"][i] + kpts["kpts"] = kpoints["coordinates_kpoints"] + actual_kpoints = kpoints["coordinates_kpoints"] + actual_kpoint_weights = kpoints["weights_kpoints"] + + elif all(kpoints.get(f"nkp{axis}") for axis in ("x", "y", "z")): + kpts["num_kpts"] = kpoints["number_kpoints"] + kpts["kpts"] = [[kpoints[f"nkp{axis}"] for axis in ("x", "y", "z")]] + + return ( + Kpoints(**kpts), + actual_kpoints, + actual_kpoint_weights, + ) + + @staticmethod + def _parse_atominfo(composition: Composition): + # TODO: this function seems irrelevant but is used in Vasprun, do we need this? + atom_symbols = [] + for element in composition: + atom_symbols += [str(element) for _ in range(int(composition[element]))] + return atom_symbols + + def _parse_params(self, input_data: dict): # type: ignore[override] + self.incar = Incar(input_data["incar"]) + + # TODO: set defaults in parameters to match vasprun? + self.parameters = Incar.from_dict(self.incar.as_dict()) # type: ignore[attr-defined] + + self.kpoints: list[Any] | None = None # type: ignore[assignment] + self.actual_kpoints: list[Any] | None = None # type: ignore[assignment] + self.actual_kpoints_weights: Sequence[float] = None # type: ignore[assignment] + if not self.incar.get("KSPACING"): + ( + self.kpoints, + self.actual_kpoints, + self.actual_kpoints_weights, + ) = self._parse_kpoints(input_data["kpoints"]) # type: ignore[assignment] + + self.initial_structure = self._parse_structure(input_data["poscar"]) + self.atomic_symbols = self._parse_atominfo(self.initial_structure.composition) + + self.potcar = None + self.potcar_symbols = [] + self.potcar_spec = [] + if input_data["potcar"].get("content"): + # Unmodified vaspout.h5 with full POTCAR + calc_potcar = Potcar.from_str(input_data["potcar"]["content"]) + self.potcar = calc_potcar if self.store_potcar else None + # The `potcar_symbols` attr is extraordinarily confusingly + # named, these are really TITELs + self.potcar_symbols = [potcar.TITEL for potcar in calc_potcar] + + # For parity with vasprun.xml, we do not store the POTCAR symbols in + # the vaspout.h5 POTCAR spec. These are derived from the TITEL fields + # and are thus redundant. + self.potcar_spec = [p.spec(extra_spec=[]) for p in calc_potcar] + + elif input_data["potcar"].get("spec"): + # modified vaspout.h5 with only POTCAR spec + import json + + self.potcar_spec = json.loads(input_data["potcar"]["spec"]) + self.potcar_symbols = [spec["titel"] for spec in self.potcar_spec] + + # TODO: do we want POSCAR stored? + self.poscar = Poscar( + structure=self.initial_structure, + comment=input_data["poscar"].get("system"), + selective_dynamics=self.initial_structure.site_properties.get("selective_dynamics"), + velocities=self.initial_structure.site_properties.get("velocities"), + ) + + def _get_ionic_steps(self, ion_dynamics) -> None: + # use same key accession as in vasprun.xml + vasp_key_to_pmg = { + "free energy TOTEN": "e_fr_energy", + "energy without entropy": "e_wo_entrp", + "energy(sigma->0)": "e_0_energy", + } + + # label s, p, d,... contributions to charge and magnetic moment in same way as Outcar + _to_outcar_tag = {"total charge": "charge", "magnetization (x)": "magnetization"} + + self.nionic_steps = len(ion_dynamics["energies"]) + self.ionic_steps = [] + + ionic_step_keys = [ + key + for key in ( + "forces", + "stresses", + ) + if ion_dynamics.get(key) + ] + + for istep in range(self.nionic_steps): + step = { + **{ + vasp_key_to_pmg[ion_dynamics["energies_tags"][ivalue]]: value + for ivalue, value in enumerate(ion_dynamics["energies"][istep]) + }, + **{key: ion_dynamics[key][istep] for key in ionic_step_keys}, + "structure": Structure( + lattice=Lattice(ion_dynamics["lattice_vectors"][istep]), + species=self.initial_structure.species, + coords=ion_dynamics["position_ions"][istep], + coords_are_cartesian=False, # TODO check this is always False + ), + # Placeholder - there's currently no info about electronic steps + # in vaspout.h5 + "electronic_steps": [], + } + if chg_dens_props := ion_dynamics.get("magnetism"): + for ik, k in enumerate(chg_dens_props["component_tags"]): + site_prop = [ + { + orb: chg_dens_props["moments"][istep][ik][iion][iorb] + for iorb, orb in enumerate(chg_dens_props["orbital_tags"]) + } + for iion in range(len(self.poscar.structure)) + ] + for iion in range(len(self.poscar.structure)): + site_prop[iion]["tot"] = sum(site_prop[iion].values()) + step["structure"].add_site_property(_to_outcar_tag.get(k), site_prop) + + self.ionic_steps += [step] + + def _parse_results(self, results: dict) -> None: + self.final_structure = self._parse_structure(results["positions"]) + + def _parse_dos(self, electron_dos: dict, projectors: list | None = None): # type: ignore[override] + self.efermi = electron_dos["efermi"] + + densities: dict = {} + for dos_type in ( + "dos", + "dosi", + ): + if electron_dos.get(dos_type): + densities[dos_type] = {} + for ispin in range(len(electron_dos[dos_type])): + densities[dos_type][Spin((-1) ** ispin)] = electron_dos[dos_type][ispin] + + self.tdos = Dos(self.efermi, electron_dos["energies"], densities["dos"]) + self.idos = Dos(self.efermi, electron_dos["energies"], densities["dosi"]) + + self.pdos = [] + + # for whatever reason, the naming of orbitals is different in vaspout.h5 + vasp_to_pmg_orb = { + "x2-y2": "dx2", + "fy3x2": "f_3", + "fxyz": "f_2", + "fyz2": "f_1", + "fz3": "f0", + "fxz2": "f1", + "fzx2": "f2", + "fx3": "f3", + } + + if pdos := electron_dos.get("dospar"): + projectors = projectors or [] + projectors = [char.strip() for char in projectors] + orbtyp = Orbital if any("x" in char for char in projectors) else OrbitalType + for site_pdos in pdos: + site_res_pdos: dict = defaultdict(dict) + for ispin in range(len(site_pdos)): + for ilm in range(len(site_pdos[ispin])): + orb_str = projectors[ilm] + orb_idx = orbtyp.__members__[vasp_to_pmg_orb.get(orb_str, orb_str)] + site_res_pdos[orb_idx][Spin((-1) ** ispin)] = np.array(site_pdos[ispin][ilm]) + self.pdos += [site_res_pdos] + + @staticmethod + def _parse_eigen(eigenvalues_complete: dict): # type: ignore[override] + eigenvalues = {} + for ispin in range(eigenvalues_complete["ispin"]): + eigenvalues[Spin.up if ispin == 0 else Spin.down] = np.array( + [ + [ + [ + eigenvalues_complete["eigenvalues"][ispin][i][j], + eigenvalues_complete["fermiweights"][ispin][i][j], + ] + for j in range(eigenvalues_complete["nb_tot"]) + ] + for i in range(eigenvalues_complete["kpoints"]) + ] + ) + return eigenvalues + + @property + @unitized("eV") + def final_energy(self): + """Final energy from vaspout.""" + return self.ionic_steps[-1]["e_0_energy"] + + def remove_potcar_and_write_file( + self, filename: str | Path | None = None, fake_potcar_str: str | None = None + ) -> None: + """ + Utility function to replace the full POTCAR with its spec, and write a vaspout.h5. + + This is needed for applications where one might upload VASP output + to a public database. Since vaspout.h5 includes the full POTCAR, it's necessary + to replace it here with just the spec. + + Args: + filename : str, Path, or None (default) + Name of the output file. If None, defaults to self.filename (in-place modification). + fake_potcar_str : str or None (default) + If a str, a POTCAR represented as a str. Used in the context of tests to replace + a POTCAR with a scrambled/fake POTCAR. If None, the Vaspout.potcar Field + ("/input/potcar/content" field of vaspout.h5) is removed. + """ + import json + + def recursive_to_dataset(h5_obj, level, obj): + if hasattr(obj, "items"): + if level != "/": + h5_obj.create_group(level) + for k, v in obj.items(): + recursive_to_dataset(h5_obj[level], k, v) + else: + if isinstance(obj, str): + obj = obj.encode() + data = np.array(obj) + if "U" in str(data.dtype): + data = data.astype("S") + h5_obj.create_dataset(level, data=data) + + filename = filename or self.filename + fname_prefix, fname_ext = os.path.splitext(filename) # type: ignore[type-var] + + # determine if output file is to be compressed + fname_ext = fname_ext.upper() # type: ignore[union-attr] + compressor = None + if fname_ext == ".BZ2": + compressor = "bzip" + elif fname_ext in (".GZ", ".Z"): + compressor = "gzip" + elif fname_ext in (".XZ", ".LZMA"): + compressor = "lzma" + + with zopen(self.filename, "rb") as vout_file, h5py.File(vout_file, "r") as h5_file: + hdf5_data = self._parse_hdf5_value(h5_file) + + if fake_potcar_str: + hdf5_data["input"]["potcar"]["content"] = fake_potcar_str + potcar_spec = [psingle.spec() for psingle in Potcar.from_str(fake_potcar_str)] + else: + del hdf5_data["input"]["potcar"]["content"] + potcar_spec = self.potcar_spec + + # rather than define custom HDF5 hierarchy for POTCAR spec, just dump JSONable dict to str + hdf5_data["input"]["potcar"]["spec"] = json.dumps(potcar_spec) + + # if file is to be compressed, first write uncompressed file + with h5py.File(fname_prefix if compressor else filename, "w") as h5_file: + recursive_to_dataset(h5_file, "/", hdf5_data) + + # now compress the file + if compressor: + if os.path.isfile(filename): # type: ignore[arg-type] + warnings.warn(f"File {filename} already exists, skipping compression.") + else: + subprocess.Popen(f"{compressor} {fname_prefix}").wait() diff --git a/tests/files/io/vasp/outputs/vaspout.line_mode_band_structure.h5.gz b/tests/files/io/vasp/outputs/vaspout.line_mode_band_structure.h5.gz new file mode 100644 index 00000000000..9722b298e9f Binary files /dev/null and b/tests/files/io/vasp/outputs/vaspout.line_mode_band_structure.h5.gz differ diff --git a/tests/io/vasp/test_inputs.py b/tests/io/vasp/test_inputs.py index 75fc6c0b001..de3aef908fb 100644 --- a/tests/io/vasp/test_inputs.py +++ b/tests/io/vasp/test_inputs.py @@ -1265,6 +1265,16 @@ def test_copy(self): assert psingle == self.psingle_Mn_pv assert psingle is not self.psingle_Mn_pv + def test_spec(self): + for psingle in [self.psingle_Fe, self.psingle_Fe_54, self.psingle_Mn_pv]: + expected_spec = { + "titel": psingle.TITEL, + "hash": psingle.md5_header_hash, + "summary_stats": psingle._summary_stats, + "symbol": psingle.symbol, + } + assert expected_spec == psingle.spec(extra_spec=["symbol"]) + class TestPotcar(PymatgenTest): def setUp(self): @@ -1328,6 +1338,16 @@ def test_set_symbol(self): def test_pickle(self): pickle.dumps(self.potcar) + def test_from_spec(self): + orig_potcar = Potcar(symbols=["Fe", "P", "O"], functional="PBE") + new_potcar = Potcar.from_spec(orig_potcar.spec) + assert str(new_potcar) == str(orig_potcar) + assert all( + [PotcarSingle.compare_potcar_stats(p._summary_stats, orig_potcar[ip]._summary_stats)] + for ip, p in enumerate(new_potcar) + if p.TITEL == orig_potcar[ip].TITEL + ) + # def tearDown(self): # SETTINGS["PMG_DEFAULT_FUNCTIONAL"] = "PBE" diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 1492f9d726f..5f8028ae59f 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -34,6 +34,7 @@ Outcar, Procar, UnconvergedVASPWarning, + Vaspout, VaspParseError, Vasprun, Wavecar, @@ -2048,3 +2049,61 @@ def test_consistency(self): assert np.linalg.norm([r, i]) > 0.999 else: assert np.linalg.norm([r, i]) < 0.001 + + +try: + import h5py +except ImportError: + h5py = None + + +@pytest.mark.skipif(condition=h5py is None, reason="h5py must be installed to use the .Vaspout class.") +class TestVaspout(PymatgenTest): + def setUp(self): + self.vaspout = Vaspout(f"{VASP_OUT_DIR}/vaspout.line_mode_band_structure.h5.gz") + + def test_parse(self): + from pymatgen.io.vasp.inputs import Incar + + assert self.vaspout.final_energy == approx(-8.953035077096956) + assert self.vaspout.kpoints.num_kpts == 163 + assert all(not attr for attr in [self.vaspout.is_spin, self.vaspout.is_hubbard]) + + input_docs = [(self.vaspout.incar, Incar), (self.vaspout.kpoints, Kpoints), (self.vaspout.potcar, Potcar)] + assert all(isinstance(*doc) for doc in input_docs) + + assert len(self.vaspout.ionic_steps) == 1 + + # double check that these POTCARs have been scrambled + assert all("FAKE" in psingle.data for psingle in self.vaspout.potcar) + + def test_as_dict(self): + vout_dict = self.vaspout.as_dict() + assert isinstance(vout_dict, dict) + assert all( + key in vout_dict + for key in [ + "vasp_version", + "has_vasp_completed", + "nsites", + "unit_cell_formula", + "reduced_cell_formula", + "pretty_formula", + "is_hubbard", + "hubbards", + "elements", + "nelements", + "run_type", + "input", + "output", + ] + ) + + def test_remove_potcar(self): + new_vaspout_file = f"{self.tmp_path}/vaspout.h5.gz" + self.vaspout.remove_potcar_and_write_file(filename=new_vaspout_file) + cleansed_vout = Vaspout(new_vaspout_file) + + cleansed_vout_d = cleansed_vout.as_dict() + assert all(cleansed_vout_d[k] == v for k, v in self.vaspout.as_dict().items() if k != "potcar") + assert cleansed_vout.potcar is None