Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reconcile changes on updates branch #199

Merged
merged 27 commits into from
May 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
527c46b
add manual deprecation into validation
shyamd May 27, 2021
6a5daf4
switch default deprecated tags to setting
shyamd May 27, 2021
fc8a94c
move use_statics to settings
shyamd May 27, 2021
f929bf9
clean up imports
shyamd May 27, 2021
a590c4e
add warnings for kspacing and ismear
shyamd May 27, 2021
cfc7176
update test since default for use_statics is now True
shyamd May 27, 2021
97e7fee
Merge branch 'main' into reconcile
shyamd May 27, 2021
0210fc2
speed up PD construction
shyamd May 27, 2021
8585077
remove deprecation from core since its in builders
shyamd May 27, 2021
870dad4
Add oxidation states to thermo builder
shyamd May 27, 2021
08db446
add prechunk to thermo and materials
shyamd May 27, 2021
d0fc435
jsanitize in process to not bog down main process
shyamd May 27, 2021
e74f55f
remove vestige of sandbox
shyamd May 27, 2021
aad5eeb
clean up
shyamd May 27, 2021
0791b3b
reraise exception if failure
shyamd May 27, 2021
801f920
add oxidation state builder
shyamd May 27, 2021
9e47131
compatability will select run_type
shyamd May 27, 2021
00e4cce
clean up
shyamd May 27, 2021
d7cab8a
update builder names
shyamd May 27, 2021
b119add
fix misc bugs
shyamd May 27, 2021
30ce56a
formatting bug
shyamd May 27, 2021
ddf6fa1
mark when ismear differs
shyamd May 28, 2021
8bc1999
add unrecognized task_type
shyamd May 28, 2021
03d3512
fix minor conversion bugs
shyamd May 28, 2021
2994cb4
fix counting of chemsys to process
shyamd May 28, 2021
fabbe34
catch warnings from compatability
shyamd May 28, 2021
cb293cd
fix bad structures into oxidation doc test
shyamd May 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 54 additions & 0 deletions emmet-builders/emmet/builders/materials/oxidation_states.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import Dict, List, Optional

from maggma.builders.map_builder import MapBuilder
from maggma.core import Store
from pymatgen.core import Structure

from pymatgen.core import __version__ as pymatgen_version


from emmet.core.oxidation_states import OxidationStateDoc


class OxidationStatesBuilder(MapBuilder):
def __init__(
self,
materials: Store,
oxidation_states: Store,
**kwargs,
):
"""
Creates Oxidation State documents from materials

Args:
materials: Store of materials docs
oxidation_states: Store to update with oxidation state document
query : query on materials to limit search
"""
self.materials = materials
self.oxidation_states = oxidation_states
self.kwargs = kwargs

# Enforce that we key on material_id
self.materials.key = "material_id"
self.oxidation_states.key = "material_id"
super().__init__(
source=materials,
target=oxidation_states,
projection=["structure"],
**kwargs,
)

def unary_function(self, item):
structure = Structure.from_dict(item["structure"])
oxi_doc = OxidationStateDoc.from_structure(structure)
doc = oxi_doc.dict()

doc.update(
{
"pymatgen_version": pymatgen_version,
"successful": True,
}
)

return doc
55 changes: 37 additions & 18 deletions emmet-builders/emmet/builders/vasp/materials.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,13 @@
from datetime import datetime
from itertools import chain
from operator import itemgetter
from typing import Dict, Iterator, List, Optional
from typing import Dict, Iterable, Iterator, List, Optional

from maggma.builders import Builder
from maggma.stores import Store
from pymatgen.core import Structure
from pymatgen.analysis.structure_analyzer import oxide_type
from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
from maggma.utils import grouper

from emmet.builders.utils import maximal_spanning_non_intersecting_subsets

# from emmet.core import SETTINGS
from emmet.builders import SETTINGS
from emmet.builders.settings import EmmetBuildSettings
from emmet.core.utils import group_structures, jsanitize
from emmet.core.vasp.calc_types import TaskType
from emmet.core.vasp.material import MaterialsDoc
from emmet.core.vasp.task import TaskDocument

Expand All @@ -33,11 +25,8 @@ class MaterialsBuilder(Builder):
1.) Find all documents with the same formula
2.) Select only task documents for the task_types we can select properties from
3.) Aggregate task documents based on strucutre similarity
4.) Convert task docs to property docs with metadata for selection and aggregation
5.) Select the best property doc for each property
6.) Build material document from best property docs
7.) Post-process material document
8.) Validate material document
4.) Create a MaterialDoc from the group of task documents
5.) Validate material document

"""

Expand All @@ -56,6 +45,7 @@ def __init__(
materials: Store of materials documents to generate
task_validation: Store for storing task validation results
query: dictionary to limit tasks to be analyzed
settings: EmmetSettings to use in the build process
"""

self.tasks = tasks
Expand Down Expand Up @@ -84,13 +74,42 @@ def ensure_indexes(self):
# Search index for materials
self.materials.ensure_index("material_id")
self.materials.ensure_index("last_updated")
self.materials.ensure_index("sandboxes")
self.materials.ensure_index("task_ids")

if self.task_validation:
self.task_validation.ensure_index("task_id")
self.task_validation.ensure_index("valid")

def prechunk(self, number_splits: int) -> Iterable[Dict]:
"""Prechunk the materials builder for distributed computation"""
temp_query = dict(self.query)
temp_query["state"] = "successful"
if len(self.settings.BUILD_TAGS) > 0 and len(self.settings.EXCLUDED_TAGS) > 0:
temp_query["$and"] = [
{"tags": {"$in": self.settings.BUILD_TAGS}},
{"tags": {"$nin": self.settings.EXCLUDED_TAGS}},
]
elif len(self.settings.BUILD_TAGS) > 0:
temp_query["tags"] = {"$in": self.settings.BUILD_TAGS}

self.logger.info("Finding tasks to process")
all_tasks = {
doc[self.tasks.key]
for doc in self.tasks.query(temp_query, [self.tasks.key])
}
processed_tasks = {
t_id
for d in self.materials.query({}, ["task_ids"])
for t_id in d.get("task_ids", [])
}
to_process_tasks = all_tasks - processed_tasks
to_process_forms = self.tasks.distinct(
"formula_pretty", {self.tasks.key: {"$in": list(to_process_tasks)}}
)

for formula_chunk in grouper(to_process_forms, number_splits):
yield {"formula_pretty": {"$in": list(formula_chunk)}}

def get_items(self) -> Iterator[List[Dict]]:
"""
Gets all items to process into materials documents.
Expand Down Expand Up @@ -213,7 +232,7 @@ def process_item(self, tasks: List[Dict]) -> List[Dict]:
)
self.logger.debug(f"Produced {len(materials)} materials for {formula}")

return [mat.dict() for mat in materials]
return jsanitize([mat.dict() for mat in materials], allow_bson=True)

def update_targets(self, items: List[List[Dict]]):
"""
Expand All @@ -235,7 +254,7 @@ def update_targets(self, items: List[List[Dict]]):
self.logger.info(f"Updating {len(items)} materials")
self.materials.remove_docs({self.materials.key: {"$in": material_ids}})
self.materials.update(
docs=jsanitize(items, allow_bson=True),
docs=items,
key=["material_id"],
)
else:
Expand Down
7 changes: 4 additions & 3 deletions emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from typing import Dict, List, Union, Optional
from typing import Dict, List, Optional, Union

import numpy as np
from maggma.builders import MapBuilder
from maggma.core import Store
from pymatgen.core import Structure

from emmet.builders import SETTINGS
from emmet.builders.settings import EmmetBuildSettings
from emmet.core.vasp.calc_types import run_type, task_type
from emmet.core.vasp.task import TaskDocument
from emmet.core.vasp.validation import DeprecationMessage, ValidationDoc
from emmet.builders.settings import EmmetBuildSettings


class TaskValidator(MapBuilder):
Expand Down Expand Up @@ -40,8 +40,9 @@ def __init__(
projection=[
"orig_inputs",
"output.structure",
"output.bandgap",
"input.parameters",
"calcs_reversed.output.ionic_steps.e_fr_energy",
"calcs_reversed.output.ionic_steps.electronic_steps.e_fr_energy",
"tags",
],
query=query,
Expand Down