Skip to content

Commit

Permalink
Merge 10e5a72 into ee99da2
Browse files Browse the repository at this point in the history
  • Loading branch information
shyamd committed Oct 26, 2018
2 parents ee99da2 + 10e5a72 commit 2665606
Show file tree
Hide file tree
Showing 32 changed files with 18,027 additions and 1,343 deletions.
2 changes: 1 addition & 1 deletion emmet/abinit/phonon.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_items(self):
# All relevant materials that have been updated since diffraction props were last calculated
q = dict(self.query)
q.update(self.materials.lu_filter(self.phonon))
mats = list(self.materials().find(q, {"mp_id": 1}))
mats = list(self.materials.find(q, {"mp_id": 1}))
self.logger.info("Found {} new materials for phonon data".format(len(mats)))

# list of properties queried from the results DB
Expand Down
141 changes: 32 additions & 109 deletions emmet/materials/basic_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# 1) Add checking OPs present in current implementation of site fingerprints.
# 2) Complete documentation!!!

from maggma.builder import Builder
from maggma.examples.builders import MapBuilder

__author__ = "Nils E. R. Zimmermann <nerz@lbl.gov>"

Expand All @@ -20,9 +20,9 @@
"EconNN"]


class BasicDescriptorsBuilder(Builder):
class BasicDescriptorsBuilder(MapBuilder):

def __init__(self, materials, descriptors, mat_query=None, **kwargs):
def __init__(self, materials, descriptors, **kwargs):
"""
Calculates site-based descriptors (e.g., coordination numbers
with different near-neighbor finding approaches) for materials and
Expand All @@ -44,97 +44,35 @@ def __init__(self, materials, descriptors, mat_query=None, **kwargs):

self.materials = materials
self.descriptors = descriptors
self.mat_query = mat_query if mat_query else {}

# Set up all targeted site descriptors.
self.sds = {}
for nn in nn_target_classes:
nn_ = getattr(local_env, nn)
k = 'cn_{}'.format(nn)
self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
k = 'cn_wt_{}'.format(nn)
self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
self.all_output_pieces = {'site_descriptors': [k for k in self.sds.keys()]}
self.sds['csf'] = CrystalNNFingerprint.from_preset('ops',
k = "cn_{}".format(nn)
self.sds[k] = CoordinationNumber(nn_(), use_weights="none")
k = "cn_wt_{}".format(nn)
self.sds[k] = CoordinationNumber(nn_(), use_weights="sum")
self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]}
self.sds["csf"] = CrystalNNFingerprint.from_preset("ops",
distance_cutoffs=None,
x_diff_weight=None)
self.all_output_pieces['statistics'] = ['csf']
self.all_output_pieces["statistics"] = ["csf"]

# Set up all targeted composition descriptors.
self.cds = {}
self.cds["magpie"] = ElementProperty.from_preset('magpie')
self.all_output_pieces['composition_descriptors'] = ['magpie']
self.cds["magpie"] = ElementProperty.from_preset("magpie")
self.all_output_pieces["composition_descriptors"] = ["magpie"]

self.all_output_pieces['meta'] = ['atomate']
self.all_output_pieces["meta"] = ["atomate"]

super().__init__(sources=[materials],
targets=[descriptors],
super().__init__(source=materials,
target=descriptors,
ufn=self.calc,
projection=["structure"],
**kwargs)

def get_items(self):
"""
Gets all materials that need new descriptors.
For example, entirely new materials and materials
for which certain descriptor in the current Store
are still missing.
Returns:
generator of materials to calculate basic descriptors
and of the target quantities to be calculated
(e.g., CN with the minimum distance near neighbor
(MinimumDistanceNN) finding class from pymatgen which has label
"cn_mdnn").
"""

self.logger.info("Basic-Descriptors Builder Started")

self.logger.info("Setting indexes")

# All relevant materials that have been updated since descriptors
# were last calculated

q = dict(self.mat_query)
all_task_ids = list(self.materials.distinct(self.materials.key, q))
q.update(self.materials.lu_filter(self.descriptors))
new_task_ids = list(self.materials.distinct(self.materials.key, q))
self.logger.info(
"Found {} entirely new materials for descriptors data".format(
len(new_task_ids)))
for task_id in all_task_ids:
if task_id in new_task_ids:
any_piece = True

else: # Any piece of info missing?
data_present = self.descriptors.query(
properties=[self.descriptors.key,
"meta",
"composition_descriptors",
"site_descriptors",
"statistics"],
criteria={self.descriptors.key: task_id}).limit(1)[0]
any_piece = False
for k, v in self.all_output_pieces.items():
if k not in list(data_present.keys()):
any_piece = True
break
else:
any_piece = False
for e in v:
if e not in data_present[k]:
any_piece = True
break
if not any_piece:
for l in self.sds['csf'].feature_labels():
for fpi in data_present['site_descriptors']['csf']:
if l not in fpi.keys():
any_piece = True
break
if any_piece:
yield self.materials.query(
properties=[self.materials.key, "structure"],
criteria={self.materials.key: task_id}).limit(1)[0]

def process_item(self, item):
def calc(self, item):
"""
Calculates all basic descriptors for the structures
Expand All @@ -148,45 +86,30 @@ def process_item(self, item):
self.logger.debug("Calculating basic descriptors for {}".format(
item[self.materials.key]))

struct = Structure.from_dict(item['structure'])
struct = Structure.from_dict(item["structure"])

descr_doc = {'structure': struct.copy()}
descr_doc['meta'] = {'atomate': get_meta_from_structure(struct)}
descr_doc = {"structure": struct.copy()}
descr_doc["meta"] = {"atomate": get_meta_from_structure(struct)}
try:
comp_descr = [{'name': 'magpie'}]
comp_descr = [{"name": "magpie"}]
labels = self.cds["magpie"].feature_labels()
values = self.cds["magpie"].featurize(struct.composition)
for label, value in zip(labels, values):
comp_descr[0][label] = value
descr_doc['composition_descriptors'] = comp_descr
descr_doc["composition_descriptors"] = comp_descr
except Exception as e:
self.logger.error("Failed getting Magpie descriptors: "
"{}".format(e))
descr_doc['site_descriptors'] = \
descr_doc["site_descriptors"] = \
self.get_site_descriptors_from_struct(
descr_doc['structure'])
descr_doc['statistics'] = \
descr_doc["structure"])
descr_doc["statistics"] = \
self.get_statistics(
descr_doc['site_descriptors'])
descr_doc["site_descriptors"])
descr_doc[self.descriptors.key] = item[self.materials.key]

return descr_doc

def update_targets(self, items):
"""
Inserts the new task_types into the task_types collection.
Args:
items ([[dict]]): a list of list of descriptors dictionaries to update.
"""
items = list(filter(None, items))

if len(items) > 0:
self.logger.info("Updating {} basic-descriptors docs".format(len(items)))
self.descriptors.update(docs=items)
else:
self.logger.info("No items to update")

def get_site_descriptors_from_struct(self, structure):
doc = {}

Expand All @@ -196,7 +119,7 @@ def get_site_descriptors_from_struct(self, structure):
d = []
l = sd.feature_labels()
for i, s in enumerate(structure.sites):
d.append({'site': i})
d.append({"site": i})
for j, desc in enumerate(sd.featurize(structure, i)):
d[i][l[j]] = desc
doc[k] = d
Expand All @@ -207,7 +130,7 @@ def get_site_descriptors_from_struct(self, structure):

return doc

def get_statistics(self, site_descr, fps=('csf', )):
def get_statistics(self, site_descr, fps=("csf", )):
doc = {}

# Compute site-descriptor statistics.
Expand All @@ -223,9 +146,9 @@ def get_statistics(self, site_descr, fps=('csf', )):
tmp[l].append(v)
d = []
for k, l in tmp.items():
dtmp = {'name': k}
dtmp['mean'] = np.mean(tmp[k])
dtmp['std'] = np.std(tmp[k])
dtmp = {"name": k}
dtmp["mean"] = np.mean(tmp[k])
dtmp["std"] = np.std(tmp[k])
d.append(dtmp)
doc[fp] = d

Expand Down
100 changes: 35 additions & 65 deletions emmet/materials/bond_valence.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,65 @@
import os.path
from monty.serialization import loadfn

from pymatgen.core.structure import Structure
from pymatgen.analysis.bond_valence import BVAnalyzer
from pymatgen.core.periodic_table import Specie
from pymatgen import __version__ as pymatgen_version

from maggma.builder import Builder
from maggma.examples.builders import MapBuilder
from maggma.validator import JSONSchemaValidator


BOND_VALENCE_SCHEMA = {
"title": "bond_valence",
"type": "object",
"properties":
{
"task_id": {"type": "string"},
"method": {"type": "string"},
"possible_species": {"type": "array", "items": {"type": "strinig"}},
"possible_valences": {"type": "array", "items": {"type": "number"}},
"successful": {"type": "boolean"},
"pymatgen_version": {"type": "string"},
},
"required": ["task_id", "successful", "pymatgen_version"]
}
MODULE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)))
BOND_VALENCE_SCHEMA = os.path.join(MODULE_DIR, "schema", "bond_valence.json")


class BondValenceBuilder(Builder):
class BondValenceBuilder(MapBuilder):
"""
Calculate plausible oxidation states from structures.
"""

def __init__(self, materials, bond_valence,
query=None, **kwargs):
def __init__(self, materials, bond_valence, **kwargs):

self.materials = materials
self.bond_valence = bond_valence
self.query = query or {}

super().__init__(sources=[materials],
targets=[bond_valence],
**kwargs)

def get_items(self):
self.bond_valence.validator = JSONSchemaValidator(loadfn(BOND_VALENCE_SCHEMA))
super().__init__(source=materials, target=bond_valence, ufn=self.calc, projection=["structure"], **kwargs)

materials = self.materials.query(criteria=self.query,
properties=["task_id", "structure"])
# All relevant materials that have been updated since bond valences
# were last calculated
q = dict(self.query)
q.update(self.materials.lu_filter(self.bond_valence))
new_keys = list(self.materials.distinct(self.materials.key, q))

materials = self.materials.query(criteria={self.materials.key: {'$in': new_keys}},
properties=["task_id", "structure"])

self.total = materials.count()
self.logger.info("Found {} new materials for bond valence analysis".format(self.total))
def calc(self, item):
s = Structure.from_dict(item['structure'])

for material in materials:
yield material
d = {"pymatgen_version": pymatgen_version, "successful": False}

def process_item(self, item):
s = Structure.from_dict(item['structure'])
try:
bva = BVAnalyzer()
valences = bva.get_valences(s)
possible_species = {str(Specie(s[idx].specie, oxidation_state=valence))
for idx, valence in enumerate(valences)}
possible_species = {
str(Specie(s[idx].specie, oxidation_state=valence)) for idx, valence in enumerate(valences)
}

method = "BVAnalyzer"

d["successful"] = True
d["bond_valence"] = {
"possible_species": list(possible_species),
"possible_valences": valences,
"method": "oxi_state_guesses"
}

except ValueError:
try:
first_oxi_state_guess = s.composition.oxi_state_guesses()[0]
valences = [first_oxi_state_guess[site.species_string] for site in s]
possible_species = {str(Specie(el, oxidation_state=valence))
for el, valence in first_oxi_state_guess.items()}
method = "oxi_state_guesses"
except:
return {
"task_id": item['task_id'],
"pymatgen_version": pymatgen_version,
"successful": False
possible_species = {
str(Specie(el, oxidation_state=valence)) for el, valence in first_oxi_state_guess.items()
}
d["successful"] = True
d["bond_valence"] = {
"possible_species": list(possible_species),
"possible_valences": valences,
"method": "oxi_state_guesses"
}
except:
pass

return {
"task_id": item['task_id'],
"possible_species": list(possible_species),
"possible_valences": valences,
"method": method,
"pymatgen_version": pymatgen_version,
"successful": True
}

def update_targets(self, items):
self.logger.debug("Updating {} bond valence documents".format(len(items)))
self.bond_valence.update(docs=items, key=['task_id'])
return d
Loading

0 comments on commit 2665606

Please sign in to comment.