Skip to content

Commit

Permalink
replace check for potcar hash with check for potcar summary stats (#966)
Browse files Browse the repository at this point in the history
* replace check for potcar hash with check for potcar summary stats

squash

* rename vasp hashes to vasp stats in emmet settings

* linting fixes

* remove second CalcType import

* Add legacy potcar checking using md5 header hashes

* Revert "Add legacy potcar checking using md5 header hashes"

This reverts commit 2c13dc9.

* Undo precommit changes

* Add deprecation warning for when space group of crystal in input can't be determined

* add missing test file

* update potcar validation field name for EmmetBuildSettings

* try catch for get_potcar_stats

* pre-commit format

* mypy fixes

---------

Co-authored-by: esoteric-ephemera <aaron.kaplan.physics@gmail.com>
  • Loading branch information
tsmathis and esoteric-ephemera committed Mar 20, 2024
1 parent d334889 commit cf9f606
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 64 deletions.
31 changes: 31 additions & 0 deletions emmet-builders/emmet/builders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from itertools import chain, combinations
from pymatgen.core import Structure
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from pymatgen.io.vasp.inputs import PotcarSingle

from emmet.builders.settings import EmmetBuildSettings


def maximal_spanning_non_intersecting_subsets(sets) -> Set[Set]:
Expand Down Expand Up @@ -211,3 +214,31 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.close()
sys.stdout = self._original_stdout


def get_potcar_stats():
default_settings = EmmetBuildSettings()

stats: dict[str, dict] = {} # type: ignore

for (
calc_type,
input_set,
) in default_settings.VASP_DEFAULT_INPUT_SETS.items():
_input = input_set()

stats[calc_type] = {}
functional = _input._config_dict["POTCAR_FUNCTIONAL"]

for potcar_symbol in _input.CONFIG["POTCAR"].values():
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
summary_stats = potcar._summary_stats.copy()
# fallback method for validation - use header hash and symbol
# note that the potcar_spec assigns PotcarSingle.symbol to "titel"
summary_stats["titel"] = potcar.TITEL
summary_stats["hash"] = potcar.md5_header_hash
stats[calc_type].update({potcar_symbol: summary_stats})

return stats
35 changes: 10 additions & 25 deletions emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Dict, Optional
from collections import defaultdict

from maggma.builders import MapBuilder
from maggma.core import Store

from emmet.builders.settings import EmmetBuildSettings
from emmet.core.vasp.task_valid import TaskDocument
from emmet.builders.utils import get_potcar_stats
from emmet.core.vasp.calc_types.enums import CalcType
from emmet.core.vasp.task_valid import TaskDocument
from emmet.core.vasp.validation import DeprecationMessage, ValidationDoc


Expand All @@ -15,7 +15,7 @@ def __init__(
self,
tasks: Store,
task_validation: Store,
potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None,
potcar_stats: Optional[Dict[CalcType, Dict[str, str]]] = None,
settings: Optional[EmmetBuildSettings] = None,
query: Optional[Dict] = None,
**kwargs,
Expand All @@ -26,37 +26,22 @@ def __init__(
Args:
tasks: Store of task documents
task_validation: Store of task_types for tasks
potcar_hashes: Optional dictionary of potcar hash data.
potcar_stats: Optional dictionary of potcar hash data.
Mapping is calculation type -> potcar symbol -> hash value.
"""
self.tasks = tasks
self.task_validation = task_validation
self.settings = EmmetBuildSettings.autoload(settings)
self.query = query
self.kwargs = kwargs
self.potcar_hashes = potcar_hashes
self.potcar_stats = potcar_stats

# Set up potcar cache if appropriate
if self.settings.VASP_VALIDATE_POTCAR_HASHES:
if not self.potcar_hashes:
from pymatgen.io.vasp.inputs import PotcarSingle

hashes = defaultdict(dict) # type: dict

for (
calc_type,
input_set,
) in self.settings.VASP_DEFAULT_INPUT_SETS.items():
functional = input_set.CONFIG["POTCAR_FUNCTIONAL"]
for potcar_symbol in input_set.CONFIG["POTCAR"].values():
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
hashes[calc_type][potcar_symbol] = potcar._summary_stats

self.potcar_hashes = potcar_hashes
if self.settings.VASP_VALIDATE_POTCAR_STATS:
if not self.potcar_stats:
self.potcar_stats = get_potcar_stats()
else:
self.potcar_hashes = None
self.potcar_stats = None

super().__init__(
source=tasks,
Expand Down Expand Up @@ -88,7 +73,7 @@ def unary_function(self, item):
input_sets=self.settings.VASP_DEFAULT_INPUT_SETS,
LDAU_fields=self.settings.VASP_CHECKED_LDAU_FIELDS,
max_allowed_scf_gradient=self.settings.VASP_MAX_SCF_GRADIENT,
potcar_hashes=self.potcar_hashes,
potcar_stats=self.potcar_stats,
)

bad_tags = list(set(task_doc.tags).intersection(self.settings.DEPRECATED_TAGS))
Expand Down
2 changes: 1 addition & 1 deletion emmet-builders/tests/test_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def tasks_store(test_dir):

@pytest.fixture(scope="session")
def validation_store(tasks_store):
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_HASHES=False)
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_STATS=False)
validation_store = MemoryStore()
builder = TaskValidator(
tasks=tasks_store, task_validation=validation_store, settings=settings
Expand Down
27 changes: 27 additions & 0 deletions emmet-builders/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
chemsys_permutations,
maximal_spanning_non_intersecting_subsets,
get_hop_cutoff,
get_potcar_stats,
)
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from numpy.testing import assert_almost_equal
from monty.serialization import loadfn
from emmet.core.settings import EmmetSettings


def test_maximal_spanning_non_intersecting_subsets():
Expand Down Expand Up @@ -55,3 +57,28 @@ def test_get_hop_cutoff(test_dir):
check_mg = MigrationGraph.with_distance(nasicon_mg.structure, "Mg", d)
assert_almost_equal(d, 4.59, decimal=2)
assert len(check_mg.unique_hops) == 6


def test_get_potcar_stats():
calc_type = EmmetSettings().VASP_DEFAULT_INPUT_SETS

try:
potcar_stats = get_potcar_stats()
except Exception as exc:
if "No POTCAR for" in str(exc):
# No Potcar library available, skip test
return
else:
raise exc

# ensure that all calc types are included in potcar_stats
assert potcar_stats.keys() == calc_type.keys()

for calc_type in potcar_stats:
# ensure that each entry has needed fields for both
# legacy and modern potcar validation
assert all(
set(potcar_stats[calc_type][symb])
== set(["hash", "keywords", "titel", "stats"])
for symb in potcar_stats[calc_type]
)
2 changes: 1 addition & 1 deletion emmet-builders/tests/test_vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def validation_store():


def test_validator(tasks_store, validation_store):
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_HASHES=False)
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_STATS=False)
builder = TaskValidator(
tasks=tasks_store, task_validation=validation_store, settings=settings
)
Expand Down
4 changes: 2 additions & 2 deletions emmet-core/emmet/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ class EmmetSettings(BaseSettings):
description="Default input sets for task validation",
)

VASP_VALIDATE_POTCAR_HASHES: bool = Field(
True, description="Whether to validate POTCAR hash values."
VASP_VALIDATE_POTCAR_STATS: bool = Field(
True, description="Whether to validate POTCAR stat values."
)

VASP_CHECKED_LDAU_FIELDS: List[str] = Field(
Expand Down
71 changes: 50 additions & 21 deletions emmet-core/emmet/core/vasp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

class DeprecationMessage(DocEnum):
MANUAL = "M", "Manual deprecation"
SYMMETRY = (
"S001",
"Could not determine crystalline space group, needed for input set check.",
)
KPTS = "C001", "Too few KPoints"
KSPACING = "C002", "KSpacing not high enough"
ENCUT = "C002", "ENCUT too low"
Expand Down Expand Up @@ -66,7 +70,7 @@ def from_task_doc(
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS,
max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None,
potcar_stats: Optional[Dict[CalcType, Dict[str, str]]] = None,
) -> "ValidationDoc":
"""
Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
Expand All @@ -80,7 +84,7 @@ def from_task_doc(
LDAU_fields: LDAU fields to check for consistency
max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
initial equillibriation period
potcar_hashes: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value.
potcar_stats: Dictionary of potcar stat data. Mapping is calculation type -> potcar symbol -> hash value.
"""

bandgap = task_doc.output.bandgap
Expand Down Expand Up @@ -110,10 +114,19 @@ def from_task_doc(
reasons.append(DeprecationMessage.SET)
valid_input_set = None

try:
# Sometimes spglib can't determine space group with the default
# `symprec` and `angle_tolerance`. In these cases,
# `Structure.get_space_group_info()` fails
valid_input_set.structure.get_space_group_info()
except Exception:
reasons.append(DeprecationMessage.SYMMETRY)
valid_input_set = None

if valid_input_set:
# Checking POTCAR summary_stats if a directory is supplied
if potcar_hashes:
if _potcar_hash_check(task_doc, potcar_hashes):
if potcar_stats:
if _potcar_stats_check(task_doc, potcar_stats):
if task_type in [
TaskType.NSCF_Line,
TaskType.NSCF_Uniform,
Expand All @@ -130,6 +143,7 @@ def from_task_doc(
if task_type != task_type.NSCF_Line:
# Not validating k-point data for line-mode calculations as constructing
# the k-path is too costly for the builder and the uniform input set is used.

if valid_input_set.kpoints is not None:
if _kpoint_check(
valid_input_set,
Expand Down Expand Up @@ -311,7 +325,7 @@ def _kspacing_warnings(input_set, inputs, data, warnings, kspacing_tolerance):
)


def _potcar_hash_check(task_doc, potcar_hashes):
def _potcar_stats_check(task_doc, potcar_stats: dict):
"""
Checks to make sure the POTCAR summary stats is equal to the correct
value from the pymatgen input set.
Expand All @@ -325,32 +339,47 @@ def _potcar_hash_check(task_doc, potcar_hashes):
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False

use_legacy_hash_check = False
if any(len(entry.get("summary_stats", {})) == 0 for entry in potcar_details):
# potcar_spec doesn't include summary_stats kwarg needed to check potcars
# fall back to header hash checking
use_legacy_hash_check = True

all_match = True
for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
ref_summ_stats = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
ref_summ_stats = potcar_stats[str(task_doc.calc_type)].get(symbol, None)

if not ref_summ_stats:
# Symbol differs from reference set - deprecate
all_match = False
break

key_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
)
if use_legacy_hash_check:
all_match = all(
entry[key] == ref_summ_stats[key]
for key in (
"hash",
"titel",
)
)

data_match = all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
else:
all_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
) and all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)

if (not key_match) or (not data_match):
all_match = False
if not all_match:
break

return not all_match
Expand Down

0 comments on commit cf9f606

Please sign in to comment.