Skip to content

Commit

Permalink
added working sgroup builder
Browse files Browse the repository at this point in the history
  • Loading branch information
jmmshn committed Feb 3, 2021
1 parent 4b98a19 commit 70b1fd4
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 15 deletions.
26 changes: 14 additions & 12 deletions emmet-builders/emmet/builders/materials/electrodes.py
@@ -1,4 +1,5 @@
import operator
import math
from collections import namedtuple
from datetime import datetime
from functools import lru_cache
Expand All @@ -13,6 +14,7 @@
from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator
from pymatgen.apps.battery.insertion_battery import InsertionElectrode
from pymatgen.core import Structure
from pymatgen.entries.computed_entries import ComputedStructureEntry

__author__ = "Jimmy Shen"
__email__ = "jmmshn@lbl.gov"
Expand Down Expand Up @@ -180,11 +182,11 @@ def get_items(self):
if self.check_newer:
all_target_docs = list(
self.sgroups.query(
criteria=chemsys_query,
criteria={"chemsys": chemsys},
properties=[
"task_id",
self.sgroups.last_updated_field,
"grouped_task_ids",
"grouped_ids",
],
)
)
Expand Down Expand Up @@ -215,27 +217,27 @@ def get_items(self):
# If any material id is missing or if any material id has been updated
target_mat_ids = set()
for g_doc in all_target_docs:
target_mat_ids |= set(g_doc["grouped_task_ids"])
target_mat_ids |= set(g_doc["grouped_ids"])

self.logger.debug(
f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database."
)
if mat_ids == target_mat_ids and max_mat_time < min_target_time:
self.logger.debug(
continue
else:
self.logger.info(
f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database."
)
self._remove_targets(target_mat_ids)
else:
continue
self._remove_targets(list(target_mat_ids))

yield {"chemsys": chemsys, "materials": all_mats_in_chemsys}

def update_targets(self, items: List):
items = list(filter(None, chain.from_iterable(items)))
# items = list(filter(None, chain.from_iterable(items)))
if len(items) > 0:
self.logger.info("Updating {} sgroups documents".format(len(items)))
for k in items:
k[self.sgroups.last_updated_field] = datetime.utcnow()
for struct_group_dict in items:
struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow()
self.sgroups.update(docs=items, key=["task_id"])
else:
self.logger.info("No items to update")
Expand All @@ -262,8 +264,8 @@ def process_item(self, item: Any) -> Any:
)
# append the working_ion to the group ids
for sg in s_groups:
sg.task_id = f"{sg.task_id}_{self.working_id}"
return s_groups
sg.task_id = f"{sg.task_id}_{self.working_ion}"
return [sg.dict() for sg in s_groups]

def _remove_targets(self, rm_ids):
self.sgroups.remove_docs({"task_id": {"$in": rm_ids}})
Expand Down
6 changes: 3 additions & 3 deletions emmet-core/emmet/core/structure_group.py
Expand Up @@ -2,7 +2,7 @@
import operator
from datetime import datetime
from itertools import groupby
from typing import List, Union
from typing import Iterable, List, Union

from monty.json import MontyDecoder
from pydantic import BaseModel, Field, validator
Expand Down Expand Up @@ -126,7 +126,7 @@ def from_grouped_entries(
"structure_matched": structure_matched,
"framework_formula": framework_str,
"ignored_species": sorted(ignored_species),
"chemsys": "-".join(sorted(all_atoms)),
"chemsys": "-".join(sorted(all_atoms | set(ignored_species))),
"has_distinct_compositions": len(all_comps) > 1,
}

Expand Down Expand Up @@ -202,7 +202,7 @@ def from_ungrouped_structure_entries(

def group_entries_with_structure_matcher(
g, struct_matcher
) -> Iterator[List[Union[ComputedStructureEntry]]]:
) -> Iterable[List[Union[ComputedStructureEntry]]]:
"""
Group the entries together based on similarity of the primitive cells
Args:
Expand Down

0 comments on commit 70b1fd4

Please sign in to comment.