From 329740e48b7fa2f8045d398bdafa92a641a08816 Mon Sep 17 00:00:00 2001 From: Haoyan Huo Date: Thu, 15 Jul 2021 15:56:30 -0700 Subject: [PATCH] Fixes the problem of weird synthesis search results when querying for a range (#324) * Add new synthesis recipes schema. * [WIP] add models to synthesis recipes and implement query classes * [WIP] add query class for synthesis-type, experimental operations, and paragraph keywords (half-completed). * [WIP] add script to convert dataset from the public repo to MP database. * Change synthesis type and operations into enum type. * Add experimental conditions query class. * Only keep one API endpoint for all recipe calls. * Fix ellipsis function for removing heading characters. * Remove debugging print statement. * Return total number of hits. * Add adaptor that converts synpro collections. * Allow min/max value to be set as None. * handle cases when aggregate returns zero docs * Let mongodb return all highlights and handle char limits by ourselves * Use str for targets_formula/precursors_formula * Fix mypy and comment ensure_index calls * Add docstrings and comments to data adaptors. * Fix synthesis query dict that returns weird range query results * Add test for synthesis recipes adaptor * Add tests for synthesis recipes rester. --- src/mp_api/routes/synthesis/client.py | 59 ++- src/mp_api/routes/synthesis/client.pyi | 24 +- src/mp_api/routes/synthesis/data_adaptor.py | 4 +- .../routes/synthesis/data_adaptor_synpro.py | 17 +- src/mp_api/routes/synthesis/models/core.py | 5 + .../routes/synthesis/query_operators.py | 114 +++-- test_files/synth_doc_adaptor.json | 237 +++++++++ test_files/synth_doc_adaptor_synpro.json | 477 ++++++++++++++++++ tests/synthesis/test_adaptor.py | 21 + tests/synthesis/test_adaptor_synpro.py | 98 ++++ tests/synthesis/test_client.py | 148 +++++- tests/synthesis/test_query_operators.py | 12 +- 12 files changed, 1128 insertions(+), 88 deletions(-) create mode 100644 test_files/synth_doc_adaptor.json create mode 100644 test_files/synth_doc_adaptor_synpro.json create mode 100644 tests/synthesis/test_adaptor.py create mode 100644 tests/synthesis/test_adaptor_synpro.py diff --git a/src/mp_api/routes/synthesis/client.py b/src/mp_api/routes/synthesis/client.py index 81c0c3e0..13dedb52 100644 --- a/src/mp_api/routes/synthesis/client.py +++ b/src/mp_api/routes/synthesis/client.py @@ -1,26 +1,71 @@ +from typing import List, Optional + from mp_api.core.client import BaseRester, MPRestError -from mp_api.routes.synthesis.models import SynthesisSearchResultModel -from typing import List +from mp_api.routes.synthesis.models import SynthesisSearchResultModel, SynthesisTypeEnum, OperationTypeEnum class SynthesisRester(BaseRester): - suffix = "synthesis" document_model = SynthesisSearchResultModel # type: ignore - def search_synthesis_text(self, keywords: List[str]): + def search_synthesis_text( + self, + keywords: Optional[List[str]] = None, + synthesis_type: Optional[List[SynthesisTypeEnum]] = None, + target_formula: Optional[str] = None, + precursor_formula: Optional[str] = None, + operations: Optional[List[OperationTypeEnum]] = None, + condition_heating_temperature_min: Optional[float] = None, + condition_heating_temperature_max: Optional[float] = None, + condition_heating_time_min: Optional[float] = None, + condition_heating_time_max: Optional[float] = None, + condition_heating_atmosphere: Optional[List[str]] = None, + condition_mixing_device: Optional[List[str]] = None, + condition_mixing_media: Optional[List[str]] = None, + ): """ Search synthesis recipe text. Arguments: - keywords (List[str]): List of search keywords + keywords (Optional[List[str]]): List of string keywords to search synthesis paragraph text with + synthesis_type (Optional[List[SynthesisTypeEnum]]): Type of synthesis to include + target_formula (Optional[str]): Chemical formula of the target material + precursor_formula (Optional[str]): Chemical formula of the precursor material + operations (Optional[List[OperationTypeEnum]]): List of operations that syntheses must have + condition_heating_temperature_min (Optional[float]): Minimal heating temperature + condition_heating_temperature_max (Optional[float]): Maximal heating temperature + condition_heating_time_min (Optional[float]): Minimal heating time + condition_heating_time_max (Optional[float]): Maximal heating time + condition_heating_atmosphere (Optional[List[str]]): Required heating atmosphere, such as "air", "argon" + condition_mixing_device (Optional[List[str]]): Required mixing device, such as "zirconia", "Al2O3". + condition_mixing_media (Optional[List[str]]): Required mixing media, such as "alcohol", "water" Returns: synthesis_docs ([SynthesisDoc]): List of synthesis documents """ - keyword_string = ",".join(keywords) + # Turn None and empty list into None + keywords = keywords or None + synthesis_type = synthesis_type or None + operations = operations or None + condition_heating_atmosphere = condition_heating_atmosphere or None + condition_mixing_device = condition_mixing_device or None + condition_mixing_media = condition_mixing_media or None synthesis_docs = self._query_resource( - criteria={"keywords": keyword_string}, use_document_model=True, + criteria={ + "keywords": keywords, + "synthesis_type": synthesis_type, + "target_formula": target_formula, + "precursor_formula": precursor_formula, + "operations": operations, + "condition_heating_temperature_min": condition_heating_temperature_min, + "condition_heating_temperature_max": condition_heating_temperature_max, + "condition_heating_time_min": condition_heating_time_min, + "condition_heating_time_max": condition_heating_time_max, + "condition_heating_atmosphere": condition_heating_atmosphere, + "condition_mixing_device": condition_mixing_device, + "condition_mixing_media": condition_mixing_media, + }, + use_document_model=True, ).get("data", None) if synthesis_docs is None: diff --git a/src/mp_api/routes/synthesis/client.pyi b/src/mp_api/routes/synthesis/client.pyi index 6522d4a5..43a5d326 100644 --- a/src/mp_api/routes/synthesis/client.pyi +++ b/src/mp_api/routes/synthesis/client.pyi @@ -1,12 +1,24 @@ from typing import List, Optional -from mp_api.routes.synthesis.models import SynthesisSearchResultModel + +from mp_api.routes.synthesis.models import ( + SynthesisTypeEnum, OperationTypeEnum, SynthesisSearchResultModel +) class SynthesisRester: - def get_document_by_id( - self, - document_id: str, - fields: Optional[List[str]] = None, - monty_decode: bool = True, + def search_synthesis_text( + self, + keywords: Optional[List[str]] = None, + synthesis_type: Optional[List[SynthesisTypeEnum]] = None, + target_formula: Optional[str] = None, + precursor_formula: Optional[str] = None, + operations: Optional[List[OperationTypeEnum]] = None, + condition_heating_temperature_min: Optional[float] = None, + condition_heating_temperature_max: Optional[float] = None, + condition_heating_time_min: Optional[float] = None, + condition_heating_time_max: Optional[float] = None, + condition_heating_atmosphere: Optional[List[str]] = None, + condition_mixing_device: Optional[List[str]] = None, + condition_mixing_media: Optional[List[str]] = None, ) -> SynthesisSearchResultModel: ... diff --git a/src/mp_api/routes/synthesis/data_adaptor.py b/src/mp_api/routes/synthesis/data_adaptor.py index 2e0e4091..78eabfa6 100644 --- a/src/mp_api/routes/synthesis/data_adaptor.py +++ b/src/mp_api/routes/synthesis/data_adaptor.py @@ -31,7 +31,7 @@ def convert_recipe(recipe): print('Cannot process materials: ', targets_string) raise - recipe['targets_formula'] = [json.loads(x.to_json()) for x in target_comps] + recipe['targets_formula'] = [x.formula for x in target_comps] recipe['targets_formula_s'] = [x.reduced_formula for x in target_comps] del recipe['targets_string'] @@ -43,7 +43,7 @@ def convert_recipe(recipe): except (CompositionError, ValueError): print('Cannot process precursor material: ', precursor['material_formula']) continue - recipe['precursors_formula'].append(json.loads(comp.to_json())) + recipe['precursors_formula'].append(comp.formula) recipe['precursors_formula_s'].append(comp.reduced_formula) return recipe diff --git a/src/mp_api/routes/synthesis/data_adaptor_synpro.py b/src/mp_api/routes/synthesis/data_adaptor_synpro.py index 6d8ffd78..51379f31 100644 --- a/src/mp_api/routes/synthesis/data_adaptor_synpro.py +++ b/src/mp_api/routes/synthesis/data_adaptor_synpro.py @@ -9,7 +9,6 @@ from pymatgen.core.composition import CompositionError, Composition from pymongo import MongoClient -from tqdm import tqdm def convert_value(val): @@ -74,7 +73,7 @@ def convert_material(mat): 'amounts_vars': {x: convert_mat_value(y) for x, y in mat['amounts_vars'].items()}, 'elements_vars': {x: [str(z.strip()) for z in y if z.strip()] for x, y in mat['elements_vars'].items()}, 'additives': [str(x.strip()) for x in mat['additives'] if x.strip()], - 'oxygen_deficiency': str(mat['oxygen_deficiency']) or None + 'oxygen_deficiency': str(mat['oxygen_deficiency']) if mat['oxygen_deficiency'] else None, } @@ -84,13 +83,13 @@ def get_material_formula(mat): formula = re.sub(r'·\d*H2O', '', formula) try: return Composition(formula) - except CompositionError: + except (CompositionError, ValueError): q = None for comp in mat['composition']: if q is None: - q = Composition({x: float(y) for x, y in comp['elements'].items()}) + q = Composition({x: float(y) for x, y in comp['elements'].items()}) * float(comp['amount']) else: - q += Composition({x: float(y) for x, y in comp['elements'].items()}) + q += Composition({x: float(y) for x, y in comp['elements'].items()}) * float(comp['amount']) return q @@ -135,11 +134,11 @@ def convert_one(doc): 'material': str(x['material']) } for x in doc['reaction']['right']], }, - 'targets_formula': [json.loads(x.to_json()) for x in target_comps(doc)], + 'targets_formula': [x.formula for x in target_comps(doc)], 'target': convert_material(doc['target']), 'targets_formula_s': [x.reduced_formula for x in target_comps(doc)], 'precursors_formula_s': [x.reduced_formula for x in precursor_comps(doc)], - 'precursors_formula': [json.loads(x.to_json()) for x in precursor_comps(doc)], + 'precursors_formula': [x.formula for x in precursor_comps(doc)], 'precursors': [convert_material(x) for x in doc['precursors']], 'operations': [convert_op(x) for x in doc.get('operations', [])] } @@ -154,9 +153,9 @@ def main(): synthesis_recipes = [] - for item in tqdm(synpro_db.Reactions_Solid_State.find()): + for item in synpro_db.Reactions_Solid_State.find(): synthesis_recipes.append(convert_one(item)) - for item in tqdm(synpro_db.Reactions_Sol_Gel.find()): + for item in synpro_db.Reactions_Sol_Gel.find(): synthesis_recipes.append(convert_one(item)) with open('synthesis_recipes.json', 'w') as f: diff --git a/src/mp_api/routes/synthesis/models/core.py b/src/mp_api/routes/synthesis/models/core.py index ebd40e02..2544f23a 100644 --- a/src/mp_api/routes/synthesis/models/core.py +++ b/src/mp_api/routes/synthesis/models/core.py @@ -59,6 +59,11 @@ class SynthesisRecipe(BaseModel): class SynthesisSearchResultModel(SynthesisRecipe): + """ + Model for a document containing synthesis recipes + data and additional keyword search results + """ + search_score: Optional[float] = Field( None, description="Search score.", ) diff --git a/src/mp_api/routes/synthesis/query_operators.py b/src/mp_api/routes/synthesis/query_operators.py index 9dcd0126..92e57e4f 100644 --- a/src/mp_api/routes/synthesis/query_operators.py +++ b/src/mp_api/routes/synthesis/query_operators.py @@ -16,49 +16,49 @@ class SynthesisSearchQuery(QueryOperator): """ def query( - self, - keywords: Optional[str] = Query( - None, - description="Comma delimited string keywords to search synthesis paragraph text with.", - ), - synthesis_type: Optional[List[SynthesisTypeEnum]] = Query( - None, description="Type of synthesis to include." - ), - target_formula: Optional[str] = Query( - None, description="Chemical formula of the target material." - ), - precursor_formula: Optional[str] = Query( - None, description="Chemical formula of the precursor material." - ), - operations: Optional[List[OperationTypeEnum]] = Query( - None, description="List of operations that syntheses must have." - ), - condition_heating_temperature_min: Optional[float] = Query( - None, description="Minimal heating temperature." - ), - condition_heating_temperature_max: Optional[float] = Query( - None, description="Maximal heating temperature." - ), - condition_heating_time_min: Optional[float] = Query( - None, description="Minimal heating time." - ), - condition_heating_time_max: Optional[float] = Query( - None, description="Maximal heating time." - ), - condition_heating_atmosphere: Optional[List[str]] = Query( - None, description='Required heating atmosphere, such as "air", "argon".' - ), - condition_mixing_device: Optional[List[str]] = Query( - None, description='Required mixing device, such as "zirconia", "Al2O3".' - ), - condition_mixing_media: Optional[List[str]] = Query( - None, description='Required mixing media, such as "alcohol", "water".' - ), - skip: int = Query(0, description="Number of entries to skip in the search"), - limit: int = Query( - 10, - description="Max number of entries to return in a single query. Limited to 10.", - ), + self, + keywords: Optional[str] = Query( + None, + description="Comma delimited string keywords to search synthesis paragraph text with.", + ), + synthesis_type: Optional[List[SynthesisTypeEnum]] = Query( + None, description="Type of synthesis to include." + ), + target_formula: Optional[str] = Query( + None, description="Chemical formula of the target material." + ), + precursor_formula: Optional[str] = Query( + None, description="Chemical formula of the precursor material." + ), + operations: Optional[List[OperationTypeEnum]] = Query( + None, description="List of operations that syntheses must have." + ), + condition_heating_temperature_min: Optional[float] = Query( + None, description="Minimal heating temperature." + ), + condition_heating_temperature_max: Optional[float] = Query( + None, description="Maximal heating temperature." + ), + condition_heating_time_min: Optional[float] = Query( + None, description="Minimal heating time." + ), + condition_heating_time_max: Optional[float] = Query( + None, description="Maximal heating time." + ), + condition_heating_atmosphere: Optional[List[str]] = Query( + None, description='Required heating atmosphere, such as "air", "argon".' + ), + condition_mixing_device: Optional[List[str]] = Query( + None, description='Required mixing device, such as "zirconia", "Al2O3".' + ), + condition_mixing_media: Optional[List[str]] = Query( + None, description='Required mixing media, such as "alcohol", "water".' + ), + skip: int = Query(0, description="Number of entries to skip in the search"), + limit: int = Query( + 10, + description="Max number of entries to return in a single query. Limited to 10.", + ), ): project_dict: Dict[str, Union[Dict, int]] = { "_id": 0, @@ -131,21 +131,25 @@ def query( if operations: crit["operations.type"] = {"$all": operations} if condition_heating_temperature_min is not None: - crit["operations.conditions.heating_temperature.values"] = { - "$gte": condition_heating_temperature_min - } + field = "operations.conditions.heating_temperature.values" + if field not in crit: + crit[field] = {"$elemMatch": {}} + crit[field]["$elemMatch"]["$gte"] = condition_heating_temperature_min if condition_heating_temperature_max is not None: - crit["operations.conditions.heating_temperature.values"] = { - "$lte": condition_heating_temperature_max - } + field = "operations.conditions.heating_temperature.values" + if field not in crit: + crit[field] = {"$elemMatch": {}} + crit[field]["$elemMatch"]["$lte"] = condition_heating_temperature_max if condition_heating_time_min is not None: - crit["operations.conditions.heating_time.values"] = { - "$gte": condition_heating_time_min - } + field = "operations.conditions.heating_time.values" + if field not in crit: + crit[field] = {"$elemMatch": {}} + crit[field]["$elemMatch"]["$gte"] = condition_heating_time_min if condition_heating_time_max is not None: - crit["operations.conditions.heating_time.values"] = { - "$lte": condition_heating_time_max - } + field = "operations.conditions.heating_time.values" + if field not in crit: + crit[field] = {"$elemMatch": {}} + crit[field]["$elemMatch"]["$lte"] = condition_heating_time_max if condition_heating_atmosphere: crit["operations.conditions.heating_atmosphere"] = { "$all": condition_heating_atmosphere diff --git a/test_files/synth_doc_adaptor.json b/test_files/synth_doc_adaptor.json new file mode 100644 index 00000000..70ddccd8 --- /dev/null +++ b/test_files/synth_doc_adaptor.json @@ -0,0 +1,237 @@ +{ + "src": { + "synthesis_type": "solid-state", + "targets_string": [ + "Li4Ti5O12" + ], + "reaction": { + "element_substitution": {}, + "left_side": [ + { + "material": "TiO2", + "amount": "5" + }, + { + "material": "Li2CO3", + "amount": "2" + } + ], + "right_side": [ + { + "material": "Li4Ti5O12", + "amount": "1" + }, + { + "material": "CO2", + "amount": "2" + } + ] + }, + "reaction_string": "2 Li2CO3 + 5 TiO2 == 1 Li4Ti5O12 + 2 CO2", + "doi": "10.1149/1.1383553", + "operations": [ + { + "type": "StartingSynthesis", + "token": "fabricated", + "conditions": { + "heating_temperature": null, + "heating_time": null, + "heating_atmosphere": null, + "mixing_device": null, + "mixing_media": null + } + } + ], + "target": { + "material_string": "Li4Ti5O12", + "material_name": "", + "material_formula": "Li4Ti5O12", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "Li4Ti5O12", + "amount": "1", + "elements": { + "Li": "4", + "Ti": "5", + "O": "12" + } + } + ] + }, + "precursors": [ + { + "material_string": "TiO2", + "material_name": "", + "material_formula": "TiO2", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "TiO2", + "amount": "1", + "elements": { + "Ti": "1", + "O": "2" + } + } + ] + }, + { + "material_string": "Li2CO3", + "material_name": "", + "material_formula": "Li2CO3", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "Li2CO3", + "amount": "1", + "elements": { + "Li": "2", + "C": "1", + "O": "3" + } + } + ] + } + ], + "paragraph_string": "High surface area activated carbons were obtained <...> ce of these materials will be published elsewhere." + }, + "product": { + "synthesis_type": "solid-state", + "reaction": { + "element_substitution": {}, + "left_side": [ + { + "material": "TiO2", + "amount": "5" + }, + { + "material": "Li2CO3", + "amount": "2" + } + ], + "right_side": [ + { + "material": "Li4Ti5O12", + "amount": "1" + }, + { + "material": "CO2", + "amount": "2" + } + ] + }, + "reaction_string": "2 Li2CO3 + 5 TiO2 == 1 Li4Ti5O12 + 2 CO2", + "doi": "10.1149/1.1383553", + "operations": [ + { + "type": "StartingSynthesis", + "token": "fabricated", + "conditions": { + "heating_temperature": null, + "heating_time": null, + "heating_atmosphere": null, + "mixing_device": null, + "mixing_media": null + } + } + ], + "target": { + "material_string": "Li4Ti5O12", + "material_name": "", + "material_formula": "Li4Ti5O12", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "Li4Ti5O12", + "amount": "1", + "elements": { + "Li": "4", + "Ti": "5", + "O": "12" + } + } + ] + }, + "precursors": [ + { + "material_string": "TiO2", + "material_name": "", + "material_formula": "TiO2", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "TiO2", + "amount": "1", + "elements": { + "Ti": "1", + "O": "2" + } + } + ] + }, + { + "material_string": "Li2CO3", + "material_name": "", + "material_formula": "Li2CO3", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "Li2CO3", + "amount": "1", + "elements": { + "Li": "2", + "C": "1", + "O": "3" + } + } + ] + } + ], + "paragraph_string": "High surface area activated carbons were obtained <...> ce of these materials will be published elsewhere.", + "targets_formula": [ + "Li4 Ti5 O12" + ], + "targets_formula_s": [ + "Li4Ti5O12" + ], + "precursors_formula": [ + "Ti1 O2", + "Li2 C1 O3" + ], + "precursors_formula_s": [ + "TiO2", + "Li2CO3" + ] + } +} \ No newline at end of file diff --git a/test_files/synth_doc_adaptor_synpro.json b/test_files/synth_doc_adaptor_synpro.json new file mode 100644 index 00000000..568149c4 --- /dev/null +++ b/test_files/synth_doc_adaptor_synpro.json @@ -0,0 +1,477 @@ +{ + "src": { + "_id": "ID", + "synthesis_type": "solid-state", + "targets_string": [ + "Nb9PO25" + ], + "reaction": { + "left": [ + { + "material": "Nb2O5", + "amount": "4.5" + }, + { + "material": "NH4H2PO4", + "amount": "1" + } + ], + "right": [ + { + "material": "PNb9O25", + "amount": "1" + }, + { + "material": "H2O", + "amount": "1.5" + }, + { + "material": "NH3", + "amount": "1" + } + ], + "element_substitution": {}, + "thermo": [ + { + "amts_vars": null, + "reaction": "4.5 Nb2O5 + 1 H6N1O4P1 == 1.0 Nb9O25P1 + 1.5 H2O1 + 1 H3N1", + "dGrxn": { + "2000": -0.0405 + }, + "target": "Nb9O25P1" + } + ] + }, + "reaction_string": "1 NH4H2PO4 + 4.5 Nb2O5 == 1 PNb9O25 + 1.5 H2O + 1 NH3", + "doi": [ + "10.1149/1.1455647" + ], + "ext_abstract": [ + "Among the large family of Wadsley-Roth-type phases...", + "...", + "... of V5+ to V2+ (in tetrahedral coordination) takes place." + ], + "ext_paragraph": [ + "PNb9O25 was easily obtained by a solid-state reaction in air...", + "...", + "The program FullProf was used for crystal structure determination from the Rietveld method." + ], + "data_per_sentence": [ + "OTHER DATA" + ], + "other_materials": [ + { + "material_string": "ammonium phosphate", + "material_name": "ammonium phosphate", + "material_formula": "(NH4)3PO4", + "phase": "", + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "(NH4)3PO4", + "amount": "1", + "elements": { + "N": "3", + "H": "12", + "P": "1", + "O": "4" + }, + "species": { + "NH4": "3", + "PO4": "1" + } + } + ] + }, + { + "material_string": "water", + "material_name": "water", + "material_formula": "H2O", + "phase": null, + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "H2O", + "amount": "1", + "elements": { + "H": "2", + "O": "1" + }, + "species": { + "H2O": "1" + } + } + ] + } + ], + "operations": [ + { + "string": "heated", + "i": [ + 1, + 4 + ], + "type": "HeatingOperation", + "attributes": { + "temperature": [ + { + "min": null, + "max": 350.0, + "values": [ + 350.0 + ], + "tok_ids": [ + 19 + ], + "units": "°C" + } + ], + "time": [ + { + "max": 3.0, + "min": 3.0, + "values": [ + 3.0 + ], + "tok_ids": [ + 23 + ], + "units": "h" + } + ], + "environment": [ + "", + "" + ] + } + } + ], + "target": { + "material_string": "PNb9O25", + "material_name": "", + "material_formula": "PNb9O25", + "phase": "", + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "PNb9O25", + "amount": "1", + "elements": { + "P": "1", + "Nb": "9", + "O": "25" + }, + "species": { + "P": "1", + "Nb": "9", + "O": "25" + }, + "valence": [ + { + "valence": { + "Nb": 5.0, + "P": 5.0, + "O": -2.0 + }, + "amounts_vars": [ + {} + ], + "elements_vars": {} + } + ] + } + ], + "thermo": [ + { + "interpolation": "1.0 Nb9PO25", + "mp_ids": [ + "mp-17677" + ], + "Hf": -3.0321, + "Hd": 0.0288, + "T": { + "2000": { + "dGf": -1.8578, + "dGd": 0.0429 + } + }, + "formula": "Nb9O25P1", + "amts_vars": null + } + ] + }, + "precursors": [ + { + "material_string": "Nb2O5", + "material_name": "", + "material_formula": "Nb2O5", + "phase": "", + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "Nb2O5", + "amount": "1", + "elements": { + "Nb": "2", + "O": "5" + }, + "species": { + "Nb": "2", + "O": "5" + }, + "valence": [ + { + "valence": { + "Nb": 5.0, + "O": -2.0 + }, + "amounts_vars": [ + {} + ], + "elements_vars": {} + } + ] + } + ], + "thermo": [ + { + "interpolation": "1.0 Nb2O5", + "mp_ids": [ + "mp-1201852" + ], + "Hf": -3.0892, + "Hd": -0.1129, + "T": { + "2000": { + "dGf": -1.9428, + "dGd": -0.0777 + } + }, + "formula": "Nb2O5", + "amts_vars": null + } + ] + }, + { + "material_string": "NH4H2PO4", + "material_name": "", + "material_formula": "NH4H2PO4", + "phase": "", + "additives": [], + "oxygen_deficiency": null, + "is_acronym": false, + "amounts_vars": {}, + "elements_vars": {}, + "composition": [ + { + "formula": "NH4H2PO4", + "amount": "1", + "elements": { + "N": "1", + "H": "6", + "P": "1", + "O": "4" + }, + "species": { + "NH4": "1", + "H2PO4": "1" + }, + "valence": [ + { + "valence": { + "P": 5.0, + "H": 1.0, + "N": -3.0, + "O": -2.0 + }, + "amounts_vars": [ + {} + ], + "elements_vars": {} + } + ] + } + ], + "thermo": [ + { + "interpolation": "1.0 PH6NO4", + "mp_ids": [ + "mp-1220029" + ], + "Hf": -1.5617, + "Hd": -0.0097, + "T": { + "2000": { + "dGf": -0.2212, + "dGd": 0.1747 + } + }, + "formula": "H6N1O4P1", + "amts_vars": null + } + ] + } + ] + }, + "product": { + "doi": "['10.1149/1.1455647']", + "paragraph_string": "PNb9O25 was easily obtained by a solid-state reaction in air... ... The program FullProf was used for crystal structure determination from the Rietveld method.", + "synthesis_type": "solid-state", + "reaction_string": "1 NH4H2PO4 + 4.5 Nb2O5 == 1 PNb9O25 + 1.5 H2O + 1 NH3", + "reaction": { + "left_side": [ + { + "amount": "4.5", + "material": "Nb2O5" + }, + { + "amount": "1", + "material": "NH4H2PO4" + } + ], + "right_side": [ + { + "amount": "1", + "material": "PNb9O25" + }, + { + "amount": "1.5", + "material": "H2O" + }, + { + "amount": "1", + "material": "NH3" + } + ] + }, + "targets_formula": [ + "Nb9 P1 O25" + ], + "target": { + "material_string": "PNb9O25", + "material_name": "", + "material_formula": "PNb9O25", + "phase": null, + "is_acronym": false, + "composition": [ + { + "formula": "PNb9O25", + "amount": "1", + "elements": { + "P": "1", + "Nb": "9", + "O": "25" + } + } + ], + "amounts_vars": {}, + "elements_vars": {}, + "additives": [], + "oxygen_deficiency": null + }, + "targets_formula_s": [ + "Nb9PO25" + ], + "precursors_formula_s": [ + "Nb2O5", + "PH6NO4" + ], + "precursors_formula": [ + "Nb2 O5", + "P1 H6 N1 O4" + ], + "precursors": [ + { + "material_string": "Nb2O5", + "material_name": "", + "material_formula": "Nb2O5", + "phase": null, + "is_acronym": false, + "composition": [ + { + "formula": "Nb2O5", + "amount": "1", + "elements": { + "Nb": "2", + "O": "5" + } + } + ], + "amounts_vars": {}, + "elements_vars": {}, + "additives": [], + "oxygen_deficiency": null + }, + { + "material_string": "NH4H2PO4", + "material_name": "", + "material_formula": "NH4H2PO4", + "phase": null, + "is_acronym": false, + "composition": [ + { + "formula": "NH4H2PO4", + "amount": "1", + "elements": { + "N": "1", + "H": "6", + "P": "1", + "O": "4" + } + } + ], + "amounts_vars": {}, + "elements_vars": {}, + "additives": [], + "oxygen_deficiency": null + } + ], + "operations": [ + { + "type": "HeatingOperation", + "token": "heated", + "conditions": { + "heating_temperature": [ + { + "min_value": null, + "max_value": 350.0, + "values": [ + 350.0 + ], + "units": "\u00b0C" + } + ], + "heating_time": [ + { + "min_value": 3.0, + "max_value": 3.0, + "values": [ + 3.0 + ], + "units": "h" + } + ], + "heating_atmosphere": [], + "mixing_device": null, + "mixing_media": null + } + } + ] + } +} \ No newline at end of file diff --git a/tests/synthesis/test_adaptor.py b/tests/synthesis/test_adaptor.py new file mode 100644 index 00000000..67e3cb38 --- /dev/null +++ b/tests/synthesis/test_adaptor.py @@ -0,0 +1,21 @@ +import os +from json import load + +from pymatgen.core import Composition + +from mp_api import MAPISettings +from mp_api.routes.synthesis.data_adaptor import string2comp, convert_recipe + + +def test_string2comp(): + assert string2comp('BaTiO3') == Composition('BaTiO3') + assert string2comp('LiOH·H2O') == Composition('LiOH') + assert string2comp('TiO2·BaCO3') == Composition('TiO2') + + +def test_convert_recipe(): + with open(os.path.join(MAPISettings().test_files, "synth_doc_adaptor.json")) as file: + synth_doc = load(file) + + converted = convert_recipe(synth_doc['src']) + assert converted == synth_doc['product'] diff --git a/tests/synthesis/test_adaptor_synpro.py b/tests/synthesis/test_adaptor_synpro.py new file mode 100644 index 00000000..7d921081 --- /dev/null +++ b/tests/synthesis/test_adaptor_synpro.py @@ -0,0 +1,98 @@ +import os +from json import load + +from pymatgen.core import Composition + +from mp_api import MAPISettings +from mp_api.routes.synthesis.data_adaptor_synpro import convert_value, convert_conditions, convert_one, \ + get_material_formula + + +def test_convert_value(): + src = { + "min": None, + "max": 350.0, + "values": [350.0], + "tok_ids": [19], + "units": "°C" + } + product = { + 'min_value': None, + 'max_value': 350.0, + 'values': [350.0], + 'units': "°C", + } + assert convert_value(src) == product + + +def test_conditions(): + src = { + "temperature": [{"min": None, "max": 350.0, "values": [350.0], "tok_ids": [19], "units": "°C"}], + "time": [{"max": 3.0, "min": 3.0, "values": [3.0], "tok_ids": [23], "units": "h"}], + "environment": ["air", "O2"] + } + product = { + 'heating_temperature': [{'min_value': None, 'max_value': 350.0, 'values': [350.0], 'units': "°C", }], + 'heating_time': [{'min_value': 3.0, 'max_value': 3.0, 'values': [3.0], 'units': 'h'}], + 'heating_atmosphere': ["air", "O2"], + 'mixing_device': None, + 'mixing_media': None + } + assert convert_conditions(src, 'HeatingOperation') == product + + product = { + 'heating_temperature': [{'min_value': None, 'max_value': 350.0, 'values': [350.0], 'units': "°C", }], + 'heating_time': [{'min_value': 3.0, 'max_value': 3.0, 'values': [3.0], 'units': 'h'}], + 'heating_atmosphere': [], + 'mixing_device': 'O2', + 'mixing_media': 'air' + } + assert convert_conditions(src, 'MixingOperation') == product + + +def test_get_material_formula(): + assert get_material_formula({ + "material_formula": "NH4H2PO4", + "composition": [ + { + "formula": "NH4H2PO4", + "amount": "1", + "elements": { + "N": "1", + "H": "6", + "P": "1", + "O": "4" + }, + } + ], + }) == Composition('NH4H2PO4') + + assert get_material_formula({ + "material_formula": "TiO2-2BaCO3", + "composition": [ + { + "formula": "TiO2", + "amount": "1", + "elements": { + "Ti": "1", + "O": "2" + }, + }, + { + "formula": "BaCO3", + "amount": "2", + "elements": { + "Ba": "1", + "C": "1", + "O": "3" + }, + } + ], + }) == Composition('TiBa2C2O8') + + +def test_convert_one(): + with open(os.path.join(MAPISettings().test_files, "synth_doc_adaptor_synpro.json")) as file: + synth_doc = load(file) + + assert convert_one(synth_doc['src']) == synth_doc['product'] diff --git a/tests/synthesis/test_client.py b/tests/synthesis/test_client.py index 025e8687..128b6acd 100644 --- a/tests/synthesis/test_client.py +++ b/tests/synthesis/test_client.py @@ -1,8 +1,11 @@ +import inspect import os +from typing import List + import pytest -from mp_api.routes.synthesis.client import SynthesisRester -import inspect +from mp_api.routes.synthesis.client import SynthesisRester +from mp_api.routes.synthesis.models import SynthesisTypeEnum, SynthesisRecipe @pytest.fixture @@ -23,7 +26,6 @@ def test_client(rester): search_method = entry[1] if search_method is not None: - q = {"keywords": ["silicon"]} doc = search_method(**q)[0] @@ -31,3 +33,143 @@ def test_client(rester): assert doc.doi is not None assert doc.paragraph_string is not None assert doc.synthesis_type is not None + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_keywords(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + doc = search_method(keywords=["silicon"])[0] + + assert isinstance(doc.search_score, float) + highlighted = sum([x['texts'] for x in doc.highlights], []) + assert "silicon" in " ".join([x["value"] for x in highlighted]).lower() + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_synthesis_type(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + doc = search_method(synthesis_type=[SynthesisTypeEnum.solid_state]) + assert all(x.synthesis_type == SynthesisTypeEnum.solid_state for x in doc) + + doc = search_method(synthesis_type=[SynthesisTypeEnum.sol_gel]) + assert all(x.synthesis_type == SynthesisTypeEnum.sol_gel for x in doc) + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_temperature_range(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + docs: List[SynthesisRecipe] = search_method( + condition_heating_temperature_min=700, + condition_heating_temperature_max=1000, + ) + for doc in docs: + for op in doc.operations: + for temp in op.conditions.heating_temperature: + for val in temp.values: + assert 700 <= val <= 1000 + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_time_range(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + docs: List[SynthesisRecipe] = search_method( + condition_heating_time_min=7, + condition_heating_time_max=11, + ) + for doc in docs: + for op in doc.operations: + for temp in op.conditions.heating_time: + for val in temp.values: + assert 7 <= val <= 11 + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_atmosphere(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + docs: List[SynthesisRecipe] = search_method( + condition_heating_atmosphere=["air", "O2"], + ) + for doc in docs: + found = False + for op in doc.operations: + for atm in op.conditions.heating_atmosphere: + if atm in ["air", "O2"]: + found = True + assert found + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_mixing_device(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + docs: List[SynthesisRecipe] = search_method( + condition_mixing_device=["zirconia", "Al2O3"], + ) + for doc in docs: + found = False + for op in doc.operations: + if op.conditions.mixing_device in ["zirconia", "Al2O3"]: + found = True + assert found + + +@pytest.mark.skipif( + os.environ.get("MP_API_KEY", None) is None, reason="No API key found." +) +def test_filters_mixing_media(rester): + search_method = None + for entry in inspect.getmembers(rester, predicate=inspect.ismethod): + if "search" in entry[0] and entry[0] != "search": + search_method = entry[1] + + if search_method is not None: + docs: List[SynthesisRecipe] = search_method( + condition_mixing_media=["water", "alcohol"], + ) + for doc in docs: + found = False + for op in doc.operations: + if op.conditions.mixing_media in ["water", "alcohol"]: + found = True + assert found diff --git a/tests/synthesis/test_query_operators.py b/tests/synthesis/test_query_operators.py index 4a4ebd36..58322640 100644 --- a/tests/synthesis/test_query_operators.py +++ b/tests/synthesis/test_query_operators.py @@ -141,9 +141,9 @@ def test_synthesis_search_query(): condition_heating_time_max=5, condition_heating_temperature_min=0, condition_heating_temperature_max=5, - condition_heating_atmosphere="air", - condition_mixing_device="zirconia", - condition_mixing_media="water", + condition_heating_atmosphere=["air"], + condition_mixing_device=["zirconia"], + condition_mixing_media=["water"], skip=0, limit=10, ) @@ -163,9 +163,9 @@ def test_synthesis_search_query(): condition_heating_time_max=5, condition_heating_temperature_min=0, condition_heating_temperature_max=5, - condition_heating_atmosphere="air", - condition_mixing_device="zirconia", - condition_mixing_media="water", + condition_heating_atmosphere=["air"], + condition_mixing_device=["zirconia"], + condition_mixing_media=["water"], skip=0, limit=10, )