Merge 10e5a72 into ee99da2

materialsproject · Oct 26, 2018 · 2665606 · 2665606
2 parents ee99da2 + 10e5a72
commit 2665606
Show file tree

Hide file tree

Showing 32 changed files with 18,027 additions and 1,343 deletions.
diff --git a/emmet/abinit/phonon.py b/emmet/abinit/phonon.py
@@ -65,7 +65,7 @@ def get_items(self):
         # All relevant materials that have been updated since diffraction props were last calculated
         q = dict(self.query)
         q.update(self.materials.lu_filter(self.phonon))
-        mats = list(self.materials().find(q, {"mp_id": 1}))
+        mats = list(self.materials.find(q, {"mp_id": 1}))
         self.logger.info("Found {} new materials for phonon data".format(len(mats)))
 
         # list of properties queried from the results DB

diff --git a/emmet/materials/basic_descriptors.py b/emmet/materials/basic_descriptors.py
@@ -10,7 +10,7 @@
 # 1) Add checking OPs present in current implementation of site fingerprints.
 # 2) Complete documentation!!!
 
-from maggma.builder import Builder
+from maggma.examples.builders import MapBuilder
 
 __author__ = "Nils E. R. Zimmermann <nerz@lbl.gov>"
 
@@ -20,9 +20,9 @@
     "EconNN"]
 
 
-class BasicDescriptorsBuilder(Builder):
+class BasicDescriptorsBuilder(MapBuilder):
 
-    def __init__(self, materials, descriptors, mat_query=None, **kwargs):
+    def __init__(self, materials, descriptors, **kwargs):
         """
         Calculates site-based descriptors (e.g., coordination numbers
         with different near-neighbor finding approaches) for materials and
@@ -44,97 +44,35 @@ def __init__(self, materials, descriptors, mat_query=None, **kwargs):
 
         self.materials = materials
         self.descriptors = descriptors
-        self.mat_query = mat_query if mat_query else {}
 
         # Set up all targeted site descriptors.
         self.sds = {}
         for nn in nn_target_classes:
             nn_ = getattr(local_env, nn)
-            k = 'cn_{}'.format(nn)
-            self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
-            k = 'cn_wt_{}'.format(nn)
-            self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
-        self.all_output_pieces = {'site_descriptors': [k for k in self.sds.keys()]}
-        self.sds['csf'] = CrystalNNFingerprint.from_preset('ops',
+            k = "cn_{}".format(nn)
+            self.sds[k] = CoordinationNumber(nn_(), use_weights="none")
+            k = "cn_wt_{}".format(nn)
+            self.sds[k] = CoordinationNumber(nn_(), use_weights="sum")
+        self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]}
+        self.sds["csf"] = CrystalNNFingerprint.from_preset("ops",
                                                            distance_cutoffs=None,
                                                            x_diff_weight=None)
-        self.all_output_pieces['statistics'] = ['csf']
+        self.all_output_pieces["statistics"] = ["csf"]
 
         # Set up all targeted composition descriptors.
         self.cds = {}
-        self.cds["magpie"] = ElementProperty.from_preset('magpie')
-        self.all_output_pieces['composition_descriptors'] = ['magpie']
+        self.cds["magpie"] = ElementProperty.from_preset("magpie")
+        self.all_output_pieces["composition_descriptors"] = ["magpie"]
 
-        self.all_output_pieces['meta'] = ['atomate']
+        self.all_output_pieces["meta"] = ["atomate"]
 
-        super().__init__(sources=[materials],
-                         targets=[descriptors],
+        super().__init__(source=materials,
+                         target=descriptors,
+                         ufn=self.calc,
+                         projection=["structure"],
                          **kwargs)
 
-    def get_items(self):
-        """
-        Gets all materials that need new descriptors.
-        For example, entirely new materials and materials
-        for which certain descriptor in the current Store
-        are still missing.
-
-        Returns:
-            generator of materials to calculate basic descriptors
-            and of the target quantities to be calculated
-            (e.g., CN with the minimum distance near neighbor
-            (MinimumDistanceNN) finding class from pymatgen which has label
-            "cn_mdnn").
-        """
-
-        self.logger.info("Basic-Descriptors Builder Started")
-
-        self.logger.info("Setting indexes")
-
-        # All relevant materials that have been updated since descriptors
-        # were last calculated
-
-        q = dict(self.mat_query)
-        all_task_ids = list(self.materials.distinct(self.materials.key, q))
-        q.update(self.materials.lu_filter(self.descriptors))
-        new_task_ids = list(self.materials.distinct(self.materials.key, q))
-        self.logger.info(
-            "Found {} entirely new materials for descriptors data".format(
-            len(new_task_ids)))
-        for task_id in all_task_ids:
-            if task_id in new_task_ids:
-                any_piece = True
-
-            else: # Any piece of info missing?
-                data_present = self.descriptors.query(
-                        properties=[self.descriptors.key,
-                                    "meta",
-                                    "composition_descriptors",
-                                    "site_descriptors",
-                                    "statistics"],
-                        criteria={self.descriptors.key: task_id}).limit(1)[0]
-                any_piece = False
-                for k, v in self.all_output_pieces.items():
-                    if k not in list(data_present.keys()):
-                        any_piece = True
-                        break
-                    else:
-                        any_piece = False
-                        for e in v:
-                            if e not in data_present[k]:
-                                any_piece = True
-                                break
-                if not any_piece:
-                    for l in self.sds['csf'].feature_labels():
-                        for fpi in data_present['site_descriptors']['csf']:
-                            if l not in fpi.keys():
-                                any_piece = True
-                                break
-            if any_piece:
-                yield self.materials.query(
-                        properties=[self.materials.key, "structure"],
-                        criteria={self.materials.key: task_id}).limit(1)[0]
-
-    def process_item(self, item):
+    def calc(self, item):
         """
         Calculates all basic descriptors for the structures
 
@@ -148,45 +86,30 @@ def process_item(self, item):
         self.logger.debug("Calculating basic descriptors for {}".format(
             item[self.materials.key]))
 
-        struct = Structure.from_dict(item['structure'])
+        struct = Structure.from_dict(item["structure"])
 
-        descr_doc = {'structure': struct.copy()}
-        descr_doc['meta'] = {'atomate': get_meta_from_structure(struct)}
+        descr_doc = {"structure": struct.copy()}
+        descr_doc["meta"] = {"atomate": get_meta_from_structure(struct)}
         try:
-            comp_descr = [{'name': 'magpie'}]
+            comp_descr = [{"name": "magpie"}]
             labels = self.cds["magpie"].feature_labels()
             values = self.cds["magpie"].featurize(struct.composition)
             for label, value in zip(labels, values):
                 comp_descr[0][label] = value
-            descr_doc['composition_descriptors'] = comp_descr
+            descr_doc["composition_descriptors"] = comp_descr
         except Exception as e:
             self.logger.error("Failed getting Magpie descriptors: "
                               "{}".format(e))
-        descr_doc['site_descriptors'] = \
+        descr_doc["site_descriptors"] = \
                 self.get_site_descriptors_from_struct(
-                descr_doc['structure'])
-        descr_doc['statistics'] = \
+                descr_doc["structure"])
+        descr_doc["statistics"] = \
                 self.get_statistics(
-                descr_doc['site_descriptors'])
+                descr_doc["site_descriptors"])
         descr_doc[self.descriptors.key] = item[self.materials.key]
 
         return descr_doc
 
-    def update_targets(self, items):
-        """
-        Inserts the new task_types into the task_types collection.
-
-        Args:
-            items ([[dict]]): a list of list of descriptors dictionaries to update.
-        """
-        items = list(filter(None, items))
-
-        if len(items) > 0:
-            self.logger.info("Updating {} basic-descriptors docs".format(len(items)))
-            self.descriptors.update(docs=items)
-        else:
-            self.logger.info("No items to update")
-
     def get_site_descriptors_from_struct(self, structure):
         doc = {}
 
@@ -196,7 +119,7 @@ def get_site_descriptors_from_struct(self, structure):
                 d = []
                 l = sd.feature_labels()
                 for i, s in enumerate(structure.sites):
-                    d.append({'site': i})
+                    d.append({"site": i})
                     for j, desc in enumerate(sd.featurize(structure, i)):
                         d[i][l[j]] = desc
                 doc[k] = d
@@ -207,7 +130,7 @@ def get_site_descriptors_from_struct(self, structure):
 
         return doc
 
-    def get_statistics(self, site_descr, fps=('csf', )):
+    def get_statistics(self, site_descr, fps=("csf", )):
         doc = {}
 
         # Compute site-descriptor statistics.
@@ -223,9 +146,9 @@ def get_statistics(self, site_descr, fps=('csf', )):
                         tmp[l].append(v)
                 d = []
                 for k, l in tmp.items():
-                    dtmp = {'name': k}
-                    dtmp['mean'] = np.mean(tmp[k])
-                    dtmp['std'] = np.std(tmp[k])
+                    dtmp = {"name": k}
+                    dtmp["mean"] = np.mean(tmp[k])
+                    dtmp["std"] = np.std(tmp[k])
                     d.append(dtmp)
                 doc[fp] = d
 

diff --git a/emmet/materials/bond_valence.py b/emmet/materials/bond_valence.py
@@ -1,95 +1,65 @@
+import os.path
+from monty.serialization import loadfn
+
 from pymatgen.core.structure import Structure
 from pymatgen.analysis.bond_valence import BVAnalyzer
 from pymatgen.core.periodic_table import Specie
 from pymatgen import __version__ as pymatgen_version
 
-from maggma.builder import Builder
+from maggma.examples.builders import MapBuilder
 from maggma.validator import JSONSchemaValidator
 
-
-BOND_VALENCE_SCHEMA = {
-    "title": "bond_valence",
-    "type": "object",
-    "properties":
-        {
-            "task_id": {"type": "string"},
-            "method": {"type": "string"},
-            "possible_species": {"type": "array", "items": {"type": "strinig"}},
-            "possible_valences": {"type": "array", "items": {"type": "number"}},
-            "successful": {"type": "boolean"},
-            "pymatgen_version": {"type": "string"},
-        },
-    "required": ["task_id", "successful", "pymatgen_version"]
-}
+MODULE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)))
+BOND_VALENCE_SCHEMA = os.path.join(MODULE_DIR, "schema", "bond_valence.json")
 
 
-class BondValenceBuilder(Builder):
+class BondValenceBuilder(MapBuilder):
     """
     Calculate plausible oxidation states from structures.
     """
 
-    def __init__(self, materials, bond_valence,
-                 query=None, **kwargs):
+    def __init__(self, materials, bond_valence, **kwargs):
 
         self.materials = materials
         self.bond_valence = bond_valence
-        self.query = query or {}
-
-        super().__init__(sources=[materials],
-                         targets=[bond_valence],
-                         **kwargs)
-
-    def get_items(self):
+        self.bond_valence.validator = JSONSchemaValidator(loadfn(BOND_VALENCE_SCHEMA))
+        super().__init__(source=materials, target=bond_valence, ufn=self.calc, projection=["structure"], **kwargs)
 
-        materials = self.materials.query(criteria=self.query,
-                                         properties=["task_id", "structure"])
-        # All relevant materials that have been updated since bond valences
-        # were last calculated
-        q = dict(self.query)
-        q.update(self.materials.lu_filter(self.bond_valence))
-        new_keys = list(self.materials.distinct(self.materials.key, q))
-
-        materials = self.materials.query(criteria={self.materials.key: {'$in': new_keys}},
-                                         properties=["task_id", "structure"])
-
-        self.total = materials.count()
-        self.logger.info("Found {} new materials for bond valence analysis".format(self.total))
+    def calc(self, item):
+        s = Structure.from_dict(item['structure'])
 
-        for material in materials:
-            yield material
+        d = {"pymatgen_version": pymatgen_version, "successful": False}
 
-    def process_item(self, item):
-        s = Structure.from_dict(item['structure'])
         try:
             bva = BVAnalyzer()
             valences = bva.get_valences(s)
-            possible_species = {str(Specie(s[idx].specie, oxidation_state=valence))
-                                for idx, valence in enumerate(valences)}
+            possible_species = {
+                str(Specie(s[idx].specie, oxidation_state=valence)) for idx, valence in enumerate(valences)
+            }
 
             method = "BVAnalyzer"
+
+            d["successful"] = True
+            d["bond_valence"] = {
+                "possible_species": list(possible_species),
+                "possible_valences": valences,
+                "method": "oxi_state_guesses"
+            }
+
         except ValueError:
             try:
                 first_oxi_state_guess = s.composition.oxi_state_guesses()[0]
                 valences = [first_oxi_state_guess[site.species_string] for site in s]
-                possible_species = {str(Specie(el, oxidation_state=valence))
-                                    for el, valence in first_oxi_state_guess.items()}
-                method = "oxi_state_guesses"
-            except:
-                return {
-                    "task_id": item['task_id'],
-                    "pymatgen_version": pymatgen_version,
-                    "successful": False
+                possible_species = {
+                    str(Specie(el, oxidation_state=valence)) for el, valence in first_oxi_state_guess.items()
                 }
+                d["successful"] = True
+                d["bond_valence"] = {
+                    "possible_species": list(possible_species),
+                    "possible_valences": valences,
+                    "method": "oxi_state_guesses"
+                }
+            except:
+                pass
 
-        return {
-            "task_id": item['task_id'],
-            "possible_species": list(possible_species),
-            "possible_valences": valences,
-            "method": method,
-            "pymatgen_version": pymatgen_version,
-            "successful": True
-        }
-
-    def update_targets(self, items):
-        self.logger.debug("Updating {} bond valence documents".format(len(items)))
-        self.bond_valence.update(docs=items, key=['task_id'])
+        return d