diff --git a/emmet-builders/emmet/builders/materials/provenance.py b/emmet-builders/emmet/builders/materials/provenance.py new file mode 100644 index 0000000000..e1e7552e6e --- /dev/null +++ b/emmet-builders/emmet/builders/materials/provenance.py @@ -0,0 +1,213 @@ +from collections import defaultdict +from itertools import chain +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +from maggma.core import Builder, Store +from maggma.utils import grouper +from pymatgen.analysis.structure_matcher import StructureMatcher +from pymatgen.core import Structure +from pymatgen.util.provenance import StructureNL + +from emmet.builders import SETTINGS +from emmet.builders.settings import EmmetBuildSettings +from emmet.core.provenance import ProvenanceDoc +from emmet.core.utils import group_structures +from emmet.core.vasp.calc_types import run_type, task_type +from emmet.core.vasp.validation import DeprecationMessage, ValidationDoc + + +class ProvenanceBuilder(Builder): + def __init__( + self, + materials: Store, + provenance: Store, + source_snls: List[Store], + settings: Optional[EmmetBuildSettings] = None, + query: Optional[Dict] = None, + **kwargs, + ): + """ + Creates provenance from source SNLs and materials + + Args: + materials: Store of materials docs to tag with SNLs + provenance: Store to update with provenance data + source_snls: List of locations to grab SNLs + query : query on materials to limit search + """ + self.materials = materials + self.provenance = provenance + self.source_snls = source_snls + self.settings = EmmetBuildSettings.autoload(settings) + self.query = query + self.kwargs = kwargs + + super().__init__( + sources=[materials, *source_snls], targets=[provenance], **kwargs + ) + + def ensure_indicies(self): + + self.materials.ensure_index("material_id", unique=True) + self.materials.ensure_index("formula_pretty") + + self.provenance.ensure_index("material_id", unique=True) + self.provenance.ensure_index("formula_pretty") + + for s in self.source_snls: + s.ensure_index("snl_id") + s.ensure_index("formula_pretty") + + def get_items(self) -> Tuple[List[Dict], List[Dict]]: + """ + Gets all materials to assocaite with SNLs + Returns: + generator of materials and SNLs that could match + """ + self.logger.info("Provenance Builder Started") + + self.logger.info("Setting indexes") + self.ensure_indicies() + + # Find all formulas for materials that have been updated since this + # builder was last ran + q = {**self.query, "property_name": ProvenanceDoc.property_name} + updated_materials = self.provenance.newer_in( + self.materials, + criteria=q, + exhaustive=True, + ) + forms_to_update = set( + self.materials.distinct( + "formula_pretty", {"material_id": {"$in": updated_materials}} + ) + ) + + # Find all new SNL formulas since the builder was last run + for source in self.source_snls: + new_snls = self.provenance.newer_in(source) + forms_to_update |= set(source.distinct("formula_pretty", new_snls)) + + # Now reduce to the set of formulas we actually have + forms_avail = set(self.materials.distinct("formula_pretty", self.query)) + forms_to_update = forms_to_update & forms_avail + + self.logger.info(f"Found {len(forms_to_update)} new/updated systems to proces") + + self.total = len(forms_to_update) + + for formulas in grouper(forms_to_update, self.chunk_size): + snls = [] + for source in self.source_snls: + snls.extend( + source.query(criteria={"formula_pretty": {"$in": formulas}}) + ) + + mats = list( + self.materials.query( + properties=[ + "material_id", + "last_updated", + "structure", + "initial_structures", + "formula_pretty", + ], + criteria={"formula_pretty": {"$in": formulas}}, + ) + ) + + form_groups = defaultdict(list) + for snl in snls: + form_groups[snl["formula_pretty"]].append(snl) + + mat_groups = defaultdict(list) + for mat in mats: + mat_groups[mat["formula_pretty"]].append(mat) + + for formula, snl_group in form_groups.items(): + + mat_group = mat_groups[formula] + + self.logger.debug( + f"Found {len(snl_group)} snls and {len(mat_group)} mats" + ) + yield mat_group, snl_group + + def process_item(self, item) -> List[Dict]: + """ + Matches SNLS and Materials + Args: + item (tuple): a tuple of materials and snls + Returns: + list(dict): a list of collected snls with material ids + """ + mats, source_snls = item + formula_pretty = mats[0]["formula_pretty"] + snl_docs = list() + self.logger.debug(f"Finding Provenance {formula_pretty}") + + # Match up SNLS with materials + for mat in mats: + matched_snls = list(self.match(source_snls, mat)) + if len(matched_snls) > 0: + doc = ProvenanceDoc.from_SNLs( + material_id=mat["material_id"], snls=matched_snls + ) + + doc.authors.append(self.settings.DEFAULT_AUTHOR) + doc.history.append(self.settings.DEFAULT_HISTORY) + doc.references.append(self.settings.DEFAULT_REFERENCE) + + snl_docs.append(doc.dict()) + + return snl_docs + + def match(self, snls, mat): + """ + Finds a material doc that matches with the given snl + Args: + snl ([dict]): the snls list + mat (dict): a materials doc + Returns: + generator of materials doc keys + """ + + m_strucs = [Structure.from_dict(mat["structure"])] + [ + Structure.from_dict(init_struc) for init_struc in mat["initial_structures"] + ] + snl_strucs = [StructureNL.from_dict(snl) for snl in snls] + + groups = group_structures( + m_strucs + snl_strucs, + ltol=self.settings.LTOL, + stol=self.settings.STOL, + angle_tol=self.settings.ANGLE_TOL, + ) + matched_groups = [ + group + for group in groups + if any(isinstance(struc, Structure) for struc in group) + ] + snls = [ + struc + for struc in group + for group in matched_groups + if isinstance(struc, StructureNL) + ] + + self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}") + return snls + + def update_targets(self, items): + """ + Inserts the new SNL docs into the SNL collection + """ + + snls = list(filter(None, chain.from_iterable(items))) + + if len(snls) > 0: + self.logger.info(f"Found {len(snls)} SNLs to update") + self.provenance.update(snls) + else: + self.logger.info("No items to update") diff --git a/emmet-builders/emmet/builders/settings.py b/emmet-builders/emmet/builders/settings.py index 4b6c3cf6c0..72aa1cc6d1 100644 --- a/emmet-builders/emmet/builders/settings.py +++ b/emmet-builders/emmet/builders/settings.py @@ -5,6 +5,7 @@ from pydantic.fields import Field from emmet.core.settings import EmmetSettings from emmet.core.vasp.calc_types import TaskType +from emmet.core.provenance import Author, History class EmmetBuildSettings(EmmetSettings): @@ -30,3 +31,31 @@ class EmmetBuildSettings(EmmetSettings): [t.value for t in TaskType], description="Allowed task_types to build materials from", ) + + DEFAULT_REFERENCE: str = Field( + "@article{Jain2013,\nauthor = {Jain, Anubhav and Ong, Shyue Ping and " + "Hautier, Geoffroy and Chen, Wei and Richards, William Davidson and " + "Dacek, Stephen and Cholia, Shreyas and Gunter, Dan and Skinner, David " + "and Ceder, Gerbrand and Persson, Kristin a.},\n" + "doi = {10.1063/1.4812323},\nissn = {2166532X},\n" + "journal = {APL Materials},\nnumber = {1},\npages = {011002},\n" + "title = {{The Materials Project: A materials genome approach to " + "accelerating materials innovation}},\n" + "url = {http://link.aip.org/link/AMPADS/v1/i1/p011002/s1\\&Agg=doi},\n" + "volume = {1},\nyear = {2013}\n}\n\n@misc{MaterialsProject,\n" + "title = {{Materials Project}},\nurl = {http://www.materialsproject.org}\n}", + description="Default bibtex citation for all provenance", + ) + + DEFAULT_AUTHOR: Author = Field( + Author(name="Materials Project", email="feedback@materialsproject.org"), + description="Default Author for provenance ", + ) + + DEFAULT_HISTORY: History = Field( + History( + name="Materials Project Optimized Structure", + url="http://www.materialsproject.org", + ), + description="Default History for provenance ", + ) diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 124c83222d..191a9955d6 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -19,6 +19,7 @@ from emmet.core.thermo import ThermoDoc from emmet.core.vasp.calc_types import run_type + class Thermo(Builder): def __init__( self, @@ -143,7 +144,9 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): ) return [] except Exception as e: - self.logger.error(f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}") + self.logger.error( + f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}" + ) return [] return [d.dict() for d in docs] diff --git a/emmet-builders/setup.py b/emmet-builders/setup.py index ae5d444266..2f1655a2a4 100644 --- a/emmet-builders/setup.py +++ b/emmet-builders/setup.py @@ -1,6 +1,7 @@ import datetime from pathlib import Path from setuptools import setup, find_namespace_packages + required = [] with open(Path(__file__).parent / "requirements.txt") as f: diff --git a/emmet-cli/emmet/cli/calc.py b/emmet-cli/emmet/cli/calc.py index f17a2d37a7..5b33d72e34 100644 --- a/emmet-cli/emmet/cli/calc.py +++ b/emmet-cli/emmet/cli/calc.py @@ -37,6 +37,7 @@ def get_format(fname): def load_canonical_structures(ctx, full_name, formula): from emmet.core.vasp.calc_types import task_type # TODO import error + collection = ctx.obj["COLLECTIONS"][full_name] if formula not in canonical_structures[full_name]: @@ -169,7 +170,7 @@ def calc(ctx, specs, nmax, skip): help="Author to assign to all structures.", ) @click.pass_context -def prep(ctx, archive, authors): +def prep(ctx, archive, authors): # noqa: C901 """prep structures from an archive for submission""" run = ctx.obj["RUN"] collections = ctx.obj["COLLECTIONS"] diff --git a/emmet-cli/emmet/cli/decorators.py b/emmet-cli/emmet/cli/decorators.py index 22bfb31f12..ea9f60dd2a 100644 --- a/emmet-cli/emmet/cli/decorators.py +++ b/emmet-cli/emmet/cli/decorators.py @@ -106,7 +106,10 @@ def wrapper(*args, **kwargs): run = ctx.grand_parent.params["run"] ntries = ctx.grand_parent.params["ntries"] if run: - click.secho(f"SBATCH MODE! Submitting to SLURM queue with {ntries} tries.", fg="green") + click.secho( + f"SBATCH MODE! Submitting to SLURM queue with {ntries} tries.", + fg="green", + ) directory = ctx.parent.params.get("directory") if not directory: diff --git a/emmet-cli/emmet/cli/entry_point.py b/emmet-cli/emmet/cli/entry_point.py index 610998c28f..0e8cd963c4 100644 --- a/emmet-cli/emmet/cli/entry_point.py +++ b/emmet-cli/emmet/cli/entry_point.py @@ -31,7 +31,12 @@ def opt_prompt(): @click.option("--run", is_flag=True, help="Run DB/filesystem write operations.") @click.option("--issue", type=int, help="Production tracker issue (required if --run).") @click.option("--sbatch", is_flag=True, help="Switch to SBatch mode.") -@click.option("--ntries", default=1, show_default=True, help="Number of jobs (for walltime > 48h).") +@click.option( + "--ntries", + default=1, + show_default=True, + help="Number of jobs (for walltime > 48h).", +) @click.option("--bb", is_flag=True, help="Use burst buffer.") @click.option("--yes", is_flag=True, help="Automatic yes to all prompts.") @click.option("--no-dupe-check", is_flag=True, help="Skip duplicate check(s).") @@ -66,7 +71,7 @@ def emmet(spec_or_dbfile, run, issue, sbatch, ntries, bb, yes, no_dupe_check, ve if run: if not issue: - raise EmmetCliError(f"Need issue number via --issue!") + raise EmmetCliError("Need issue number via --issue!") ctx.obj["LOG_STREAM"] = StringIO() memory_handler = logging.StreamHandler(ctx.obj["LOG_STREAM"]) diff --git a/emmet-cli/emmet/cli/tasks.py b/emmet-cli/emmet/cli/tasks.py index 47e75fc386..02e9d1fa59 100644 --- a/emmet-cli/emmet/cli/tasks.py +++ b/emmet-cli/emmet/cli/tasks.py @@ -110,11 +110,15 @@ def check_pattern(nested_allowed=False): if not nested_allowed and os.sep in pattern: raise EmmetCliError(f"Nested pattern ({pattern}) not allowed!") elif not any(pattern.startswith(p) for p in PREFIXES): - raise EmmetCliError(f"Pattern ({pattern}) only allowed to start with one of {PREFIXES}!") + raise EmmetCliError( + f"Pattern ({pattern}) only allowed to start with one of {PREFIXES}!" + ) def load_block_launchers(): - prefix = "block_" # TODO old prefixes (e.g. res/aflow) might not be needed for backup + prefix = ( + "block_" # TODO old prefixes (e.g. res/aflow) might not be needed for backup + ) block_launchers = defaultdict(list) gen = VaspDirsGenerator() for idx, vasp_dir in enumerate(gen): @@ -136,7 +140,7 @@ def extract_filename(line): @sbatch @click.option("--clean", is_flag=True, help="Remove original launchers.") @click.option("--check", is_flag=True, help="Check backup consistency.") -def backup(clean, check): +def backup(clean, check): # noqa: C901 """Backup directory to HPSS""" ctx = click.get_current_context() run = ctx.parent.parent.params["run"] @@ -232,7 +236,7 @@ def backup(clean, check): default=FILE_FILTERS_DEFAULT, help="Set the file filter(s) to match files against in each launcher.", ) -def restore(inputfile, file_filter): +def restore(inputfile, file_filter): # noqa: C901 """Restore launchers from HPSS""" ctx = click.get_current_context() run = ctx.parent.parent.params["run"] @@ -357,7 +361,7 @@ def restore(inputfile, file_filter): default=STORE_VOLUMETRIC_DATA, help="Store any of CHGCAR, LOCPOT, AECCAR0, AECCAR1, AECCAR2, ELFCAR.", ) -def parse(task_ids, snl_metas, nproc, store_volumetric_data): +def parse(task_ids, snl_metas, nproc, store_volumetric_data): # noqa: C901 """Parse VASP launchers into tasks""" ctx = click.get_current_context() if "CLIENT" not in ctx.obj: @@ -398,7 +402,9 @@ def parse(task_ids, snl_metas, nproc, store_volumetric_data): # insert empty doc with max ID + 1 into target collection for parallel SLURM jobs # NOTE use regex first to reduce size of distinct below 16MB q = {"task_id": {"$regex": r"^mp-\d{7,}$"}} - all_task_ids = [t["task_id"] for t in target.collection.find(q, {"_id": 0, "task_id": 1})] + all_task_ids = [ + t["task_id"] for t in target.collection.find(q, {"_id": 0, "task_id": 1}) + ] if not all_task_ids: all_task_ids = target.collection.distinct("task_id") diff --git a/emmet-cli/emmet/cli/utils.py b/emmet-cli/emmet/cli/utils.py index 1155b09efc..edf3e5e760 100644 --- a/emmet-cli/emmet/cli/utils.py +++ b/emmet-cli/emmet/cli/utils.py @@ -327,7 +327,7 @@ def reconstruct_command(sbatch=False): return " ".join(command).strip().strip("\\") -def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): +def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): # noqa: C901 process = multiprocessing.current_process() name = process.name chunk_idx = int(name.rsplit("-")[1]) - 1 @@ -345,7 +345,7 @@ def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): count = 0 drone = VaspDrone( additional_fields={"tags": tags}, - store_volumetric_data=ctx.params['store_volumetric_data'] + store_volumetric_data=ctx.params["store_volumetric_data"], ) for vaspdir in vaspdirs: @@ -393,7 +393,9 @@ def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): snl_meta = snl_metas.get(launcher) if snl_meta: references = snl_meta.get("references") - authors = snl_meta.get("authors", ["Materials Project "]) + authors = snl_meta.get( + "authors", ["Materials Project "] + ) kwargs = {"projects": [tag]} if references: kwargs["references"] = references @@ -416,7 +418,11 @@ def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): target.insert_task(task_doc, use_gridfs=True) except DocumentTooLarge: output = dotty(task_doc["calcs_reversed"][0]["output"]) - pop_keys = ["normalmode_eigenvecs", "force_constants", "outcar.onsite_density_matrices"] + pop_keys = [ + "normalmode_eigenvecs", + "force_constants", + "outcar.onsite_density_matrices", + ] for k in pop_keys: if k not in output: @@ -436,7 +442,9 @@ def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas): if target.collection.count(query): if snl_dct: result = snl_collection.insert_one(snl_dct) - logger.info(f"SNL {result.inserted_id} inserted into {snl_collection.full_name}.") + logger.info( + f"SNL {result.inserted_id} inserted into {snl_collection.full_name}." + ) shutil.rmtree(vaspdir) logger.info(f"{name} Successfully parsed and removed {launcher}.") diff --git a/emmet-core/emmet/core/provenance.py b/emmet-core/emmet/core/provenance.py index 6e0da58e39..b83bb6f330 100644 --- a/emmet-core/emmet/core/provenance.py +++ b/emmet-core/emmet/core/provenance.py @@ -1,12 +1,18 @@ """ Core definition of a Provenance Document """ +import warnings +from collections import defaultdict from datetime import datetime -from typing import ClassVar, Dict, List +from typing import ClassVar, Dict, List, Optional, Union from pybtex.database import BibliographyData, parse_string -from pydantic import BaseModel, EmailStr, Field, HttpUrl, validator +from pydantic import BaseModel, EmailStr, Field, validator +from pydash.objects import get +from pymatgen.core import Structure +from pymatgen.util.provenance import StructureNL from emmet.core.material_property import PropertyDoc -from emmet.core.utils import ValueEnum +from emmet.core.mpid import MPID +from emmet.core.utils import ValueEnum, group_structures class Database(ValueEnum): @@ -15,7 +21,7 @@ class Database(ValueEnum): """ ICSD = "icsd" - PaulingFiles = "pf" + Pauling_Files = "pf" COD = "cod" @@ -34,13 +40,13 @@ class History(BaseModel): """ name: str - url: HttpUrl - description: Dict = Field( + url: str + description: Optional[Dict] = Field( None, description="Dictionary of exra data for this history node" ) -class Provenance(PropertyDoc): +class ProvenanceDoc(PropertyDoc): """ A provenance property block """ @@ -48,35 +54,121 @@ class Provenance(PropertyDoc): property_name: ClassVar[str] = "provenance" created_at: datetime = Field( - None, + ..., description="creation date for the first structure corresponding to this material", ) - projects: List[str] = Field( - None, description="List of projects this material belongs to" - ) - bibtex_string: str = Field( - None, description="Bibtex reference string for this material" + references: List[str] = Field( + [], description="Bibtex reference strings for this material" ) + + authors: List[Author] = Field([], description="List of authors for this material") + remarks: List[str] = Field( - None, description="List of remarks for the provenance of this material" + [], description="List of remarks for the provenance of this material" ) - authors: List[Author] = Field(None, description="List of authors for this material") + + tags: List[str] = Field([]) theoretical: bool = Field( True, description="If this material has any experimental provenance or not" ) database_IDs: Dict[Database, List[str]] = Field( - None, description="Database IDs corresponding to this material" + dict(), description="Database IDs corresponding to this material" ) history: List[History] = Field( - None, - description="List of history nodes specifying the transformations or orignation of this material", + [], + description="List of history nodes specifying the transformations or orignation" + " of this material for the entry closest matching the material input", ) @validator("authors") def remove_duplicate_authors(cls, authors): authors_dict = {entry.name.lower(): entry for entry in authors} return list(authors_dict.items()) + + @classmethod + def from_SNLs( + cls, + material_id: Union[MPID, int], + snls: List[Dict], + ) -> "ProvenanceDoc": + """ + Converts legacy Pymatgen SNLs into a single provenance document + """ + + # Choose earliest created_at + created_at = sorted( + [get(snl, "about.created_at.string", datetime.max) for snl in snls] + )[0] + + # Choose earliest history + history = sorted( + snls, key=lambda snl: get(snl, "about.created_at.string", datetime.max) + )[0]["about"]["history"] + + # Aggregate all references into one dict to remove duplicates + refs = {} + for snl in snls: + try: + entries = parse_string(snl["about"]["references"], bib_format="bibtex") + refs.update(entries.entries) + except Exception: + warnings.warn(f"Failed parsing bibtex: {snl['about']['references']}") + + bib_data = BibliographyData(entries=refs) + references = [ref.to_string("bibtex") for ref in bib_data.entries] + + # TODO: Maybe we should combine this robocrystallographer? + # TODO: Refine these tags / remarks + remarks = list( + set([remark for snl in snls for remark in snl["about"]["remarks"]]) + ) + tags = [r for r in remarks if len(r) < 140] + + # Aggregate all authors - Converting a single dictionary first + # performs duplicate checking + authors_dict = { + entry["name"].lower(): entry["email"] + for snl in snls + for entry in snl["about"]["authors"] + } + authors = [ + {"name": name.title(), "email": email} + for name, email in authors_dict.items() + ] + + # Check if this entry is experimental + if any(get(snl, "about.history.0.experimental", False) for snl in snls): + experimental = True + + # Aggregate all the database IDs + snl_ids = [snl.get("snl_id", "") for snl in snls] + db_ids = { + Database(db_id): [snl_id for snl_id in snl_ids if db_id in snl_id] + for db_id in map(str, Database) + } + + # remove Nones and empty lists + db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()} + db_ids = {k: v for k, v in db_ids.items() if len(v) > 0} + + # Get experimental bool + experimental = any( + get(snl, "about.history.0.experimental", False) for snl in snls + ) + + snl_fields = { + "created_at": created_at, + "references": references, + "authors": authors, + "remarks": remarks, + "tags": tags, + "database_IDs": db_ids, + "theoretical": not experimental, + "history": history, + } + + return ProvenanceDoc(material_id=material_id, **snl_fields) diff --git a/emmet-core/emmet/core/vasp/material.py b/emmet-core/emmet/core/vasp/material.py index 7613abd3c4..1eac69d829 100644 --- a/emmet-core/emmet/core/vasp/material.py +++ b/emmet-core/emmet/core/vasp/material.py @@ -4,6 +4,7 @@ from typing import ClassVar, List, Mapping, Optional, Sequence, Tuple, TypeVar, Union from pydantic import BaseModel, Field, create_model +from pymatgen.analysis.structure_analyzer import SpacegroupAnalyzer from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher from pymatgen.core import Structure from pymatgen.entries.computed_entries import ComputedStructureEntry @@ -54,7 +55,7 @@ def from_tasks( quality_scores: quality scores for various calculation types use_statics: Use statics to define a material """ - if task_group == 0: + if len(task_group) == 0: raise Exception("Must have more than one task in the group.") # Material ID @@ -105,7 +106,9 @@ def _structure_eval(task: TaskDocument): ) best_structure_calc = sorted(structure_calcs, key=_structure_eval)[0] - structure = best_structure_calc.output.structure + structure = SpacegroupAnalyzer( + best_structure_calc.output.structure, symprec=0.1 + ).get_conventional_standard_structure() # Initial Structures initial_structures = [task.input.structure for task in task_group] diff --git a/emmet-core/emmet/core/vasp/task.py b/emmet-core/emmet/core/vasp/task.py index c8b289582f..82bb793e6b 100644 --- a/emmet-core/emmet/core/vasp/task.py +++ b/emmet-core/emmet/core/vasp/task.py @@ -1,7 +1,7 @@ """ Core definition of a VASP Task Document """ from datetime import datetime from functools import lru_cache, partial -from typing import ClassVar, Dict, List, Optional, Union, Any +from typing import Any, ClassVar, Dict, List, Optional, Union from pydantic import BaseModel, Field, validator from pymatgen.analysis.magnetism import CollinearMagneticStructureAnalyzer, Ordering diff --git a/setup.cfg b/setup.cfg index 75e9591afa..531b69dc09 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,23 +19,5 @@ profile=black [pydocstyle] ignore = D105,D2,D4 -[mypy-numpy.*] -ignore_missing_imports = True - -[mypy-bson.*] -ignore_missing_imports = True - -[mypy-pymatgen.*] -ignore_missing_imports = True - -[mypy-pytest] -ignore_missing_imports = True - -[mypy-monty.*] -ignore_missing_imports = True - -[mypy-pybtex.*] -ignore_missing_imports = True - -[mypy-ruamel.*] +[mypy] ignore_missing_imports = True diff --git a/tests/emmet-core/test_provenance.py b/tests/emmet-core/test_provenance.py new file mode 100644 index 0000000000..30111c5896 --- /dev/null +++ b/tests/emmet-core/test_provenance.py @@ -0,0 +1,51 @@ +from datetime import datetime + +import pytest +from pymatgen.core import Element, Lattice, Structure +from pymatgen.util.provenance import Author, HistoryNode, StructureNL + +from emmet.core.provenance import Database, ProvenanceDoc + + +@pytest.fixture +def structure(): + test_latt = Lattice.cubic(3.0) + test_struc = Structure(lattice=test_latt, species=["Fe"], coords=[[0, 0, 0]]) + return test_struc + + +@pytest.fixture +def snls(structure): + + docs = [ + StructureNL( + structure, + authors=[Author("test{i}", "test@test.com").as_dict()], + history=[HistoryNode("nothing", "url.com", {})], + created_at=datetime.utcnow(), + ).as_dict() + for i in range(3) + ] + docs[0]["snl_id"] = "icsd-2" + docs[1]["snl_id"] = "user-1" + docs[2]["snl_id"] = "pf-3" + + return docs + + +def test_from_snls(snls): + + doc = ProvenanceDoc.from_SNLs(material_id="mp-3", snls=snls) + + assert isinstance(doc, ProvenanceDoc) + assert doc.property_name == "provenance" + assert doc.material_id == "mp-3" + assert doc.theoretical is True + assert doc.database_IDs == { + Database.ICSD: ["icsd-2"], + Database.Pauling_Files: ["pf-3"], + } + + # Test experimental detection + snls[0]["about"]["history"][0]["experimental"] = True + assert ProvenanceDoc.from_SNLs(material_id="mp-3", snls=snls).theoretical is False diff --git a/tests/emmet-core/test_settings.py b/tests/emmet-core/test_settings.py index c98c8aeaca..0a9df9b37c 100644 --- a/tests/emmet-core/test_settings.py +++ b/tests/emmet-core/test_settings.py @@ -38,7 +38,7 @@ def test_from_url(): os.environ[ "EMMET_CONFIG_FILE" - ] = "https://raw.githubusercontent.com/materialsproject/emmet/master/tests/emmet-core/test_settings.json" + ] = "https://raw.githubusercontent.com/materialsproject/emmet/master/tests/test_files/test_settings.json" test_config = EmmetSettings() diff --git a/tests/emmet-core/test_settings.json b/tests/test_files/test_settings.json similarity index 100% rename from tests/emmet-core/test_settings.json rename to tests/test_files/test_settings.json