Skip to content

Commit

Permalink
Merge pull request #199 from materialsproject/reconcile
Browse files Browse the repository at this point in the history
Reconcile changes on updates branch
  • Loading branch information
shyamd committed May 28, 2021
2 parents 9082fc4 + cb293cd commit 1d187e6
Show file tree
Hide file tree
Showing 13 changed files with 293 additions and 126 deletions.
54 changes: 54 additions & 0 deletions emmet-builders/emmet/builders/materials/oxidation_states.py
@@ -0,0 +1,54 @@
from typing import Dict, List, Optional

from maggma.builders.map_builder import MapBuilder
from maggma.core import Store
from pymatgen.core import Structure

from pymatgen.core import __version__ as pymatgen_version


from emmet.core.oxidation_states import OxidationStateDoc


class OxidationStatesBuilder(MapBuilder):
def __init__(
self,
materials: Store,
oxidation_states: Store,
**kwargs,
):
"""
Creates Oxidation State documents from materials
Args:
materials: Store of materials docs
oxidation_states: Store to update with oxidation state document
query : query on materials to limit search
"""
self.materials = materials
self.oxidation_states = oxidation_states
self.kwargs = kwargs

# Enforce that we key on material_id
self.materials.key = "material_id"
self.oxidation_states.key = "material_id"
super().__init__(
source=materials,
target=oxidation_states,
projection=["structure"],
**kwargs,
)

def unary_function(self, item):
structure = Structure.from_dict(item["structure"])
oxi_doc = OxidationStateDoc.from_structure(structure)
doc = oxi_doc.dict()

doc.update(
{
"pymatgen_version": pymatgen_version,
"successful": True,
}
)

return doc
55 changes: 37 additions & 18 deletions emmet-builders/emmet/builders/vasp/materials.py
@@ -1,21 +1,13 @@
from datetime import datetime
from itertools import chain
from operator import itemgetter
from typing import Dict, Iterator, List, Optional
from typing import Dict, Iterable, Iterator, List, Optional

from maggma.builders import Builder
from maggma.stores import Store
from pymatgen.core import Structure
from pymatgen.analysis.structure_analyzer import oxide_type
from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
from maggma.utils import grouper

from emmet.builders.utils import maximal_spanning_non_intersecting_subsets

# from emmet.core import SETTINGS
from emmet.builders import SETTINGS
from emmet.builders.settings import EmmetBuildSettings
from emmet.core.utils import group_structures, jsanitize
from emmet.core.vasp.calc_types import TaskType
from emmet.core.vasp.material import MaterialsDoc
from emmet.core.vasp.task import TaskDocument

Expand All @@ -33,11 +25,8 @@ class MaterialsBuilder(Builder):
1.) Find all documents with the same formula
2.) Select only task documents for the task_types we can select properties from
3.) Aggregate task documents based on strucutre similarity
4.) Convert task docs to property docs with metadata for selection and aggregation
5.) Select the best property doc for each property
6.) Build material document from best property docs
7.) Post-process material document
8.) Validate material document
4.) Create a MaterialDoc from the group of task documents
5.) Validate material document
"""

Expand All @@ -56,6 +45,7 @@ def __init__(
materials: Store of materials documents to generate
task_validation: Store for storing task validation results
query: dictionary to limit tasks to be analyzed
settings: EmmetSettings to use in the build process
"""

self.tasks = tasks
Expand Down Expand Up @@ -84,13 +74,42 @@ def ensure_indexes(self):
# Search index for materials
self.materials.ensure_index("material_id")
self.materials.ensure_index("last_updated")
self.materials.ensure_index("sandboxes")
self.materials.ensure_index("task_ids")

if self.task_validation:
self.task_validation.ensure_index("task_id")
self.task_validation.ensure_index("valid")

def prechunk(self, number_splits: int) -> Iterable[Dict]:
"""Prechunk the materials builder for distributed computation"""
temp_query = dict(self.query)
temp_query["state"] = "successful"
if len(self.settings.BUILD_TAGS) > 0 and len(self.settings.EXCLUDED_TAGS) > 0:
temp_query["$and"] = [
{"tags": {"$in": self.settings.BUILD_TAGS}},
{"tags": {"$nin": self.settings.EXCLUDED_TAGS}},
]
elif len(self.settings.BUILD_TAGS) > 0:
temp_query["tags"] = {"$in": self.settings.BUILD_TAGS}

self.logger.info("Finding tasks to process")
all_tasks = {
doc[self.tasks.key]
for doc in self.tasks.query(temp_query, [self.tasks.key])
}
processed_tasks = {
t_id
for d in self.materials.query({}, ["task_ids"])
for t_id in d.get("task_ids", [])
}
to_process_tasks = all_tasks - processed_tasks
to_process_forms = self.tasks.distinct(
"formula_pretty", {self.tasks.key: {"$in": list(to_process_tasks)}}
)

for formula_chunk in grouper(to_process_forms, number_splits):
yield {"formula_pretty": {"$in": list(formula_chunk)}}

def get_items(self) -> Iterator[List[Dict]]:
"""
Gets all items to process into materials documents.
Expand Down Expand Up @@ -213,7 +232,7 @@ def process_item(self, tasks: List[Dict]) -> List[Dict]:
)
self.logger.debug(f"Produced {len(materials)} materials for {formula}")

return [mat.dict() for mat in materials]
return jsanitize([mat.dict() for mat in materials], allow_bson=True)

def update_targets(self, items: List[List[Dict]]):
"""
Expand All @@ -235,7 +254,7 @@ def update_targets(self, items: List[List[Dict]]):
self.logger.info(f"Updating {len(items)} materials")
self.materials.remove_docs({self.materials.key: {"$in": material_ids}})
self.materials.update(
docs=jsanitize(items, allow_bson=True),
docs=items,
key=["material_id"],
)
else:
Expand Down
7 changes: 4 additions & 3 deletions emmet-builders/emmet/builders/vasp/task_validator.py
@@ -1,15 +1,15 @@
from typing import Dict, List, Union, Optional
from typing import Dict, List, Optional, Union

import numpy as np
from maggma.builders import MapBuilder
from maggma.core import Store
from pymatgen.core import Structure

from emmet.builders import SETTINGS
from emmet.builders.settings import EmmetBuildSettings
from emmet.core.vasp.calc_types import run_type, task_type
from emmet.core.vasp.task import TaskDocument
from emmet.core.vasp.validation import DeprecationMessage, ValidationDoc
from emmet.builders.settings import EmmetBuildSettings


class TaskValidator(MapBuilder):
Expand Down Expand Up @@ -40,8 +40,9 @@ def __init__(
projection=[
"orig_inputs",
"output.structure",
"output.bandgap",
"input.parameters",
"calcs_reversed.output.ionic_steps.e_fr_energy",
"calcs_reversed.output.ionic_steps.electronic_steps.e_fr_energy",
"tags",
],
query=query,
Expand Down

0 comments on commit 1d187e6

Please sign in to comment.