Skip to content

Commit

Permalink
Fixes the problem of weird synthesis search results when querying for…
Browse files Browse the repository at this point in the history
… a range (#324)

* Add new synthesis recipes schema.

* [WIP] add models to synthesis recipes and implement query classes

* [WIP] add query class for synthesis-type, experimental operations, and paragraph keywords (half-completed).

* [WIP] add script to convert dataset from the public repo to MP database.

* Change synthesis type and operations into enum type.

* Add experimental conditions query class.

* Only keep one API endpoint for all recipe calls.

* Fix ellipsis function for removing heading characters.

* Remove debugging print statement.

* Return total number of hits.

* Add adaptor that converts synpro collections.

* Allow min/max value to be set as None.

* handle cases when aggregate returns zero docs

* Let mongodb return all highlights and handle char limits by ourselves

* Use str for targets_formula/precursors_formula

* Fix mypy and comment ensure_index calls

* Add docstrings and comments to data adaptors.

* Fix synthesis query dict that returns weird range query results

* Add test for synthesis recipes adaptor

* Add tests for synthesis recipes rester.
  • Loading branch information
hhaoyan committed Jul 15, 2021
1 parent 20a74d6 commit 329740e
Show file tree
Hide file tree
Showing 12 changed files with 1,128 additions and 88 deletions.
59 changes: 52 additions & 7 deletions src/mp_api/routes/synthesis/client.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,71 @@
from typing import List, Optional

from mp_api.core.client import BaseRester, MPRestError
from mp_api.routes.synthesis.models import SynthesisSearchResultModel
from typing import List
from mp_api.routes.synthesis.models import SynthesisSearchResultModel, SynthesisTypeEnum, OperationTypeEnum


class SynthesisRester(BaseRester):

suffix = "synthesis"
document_model = SynthesisSearchResultModel # type: ignore

def search_synthesis_text(self, keywords: List[str]):
def search_synthesis_text(
self,
keywords: Optional[List[str]] = None,
synthesis_type: Optional[List[SynthesisTypeEnum]] = None,
target_formula: Optional[str] = None,
precursor_formula: Optional[str] = None,
operations: Optional[List[OperationTypeEnum]] = None,
condition_heating_temperature_min: Optional[float] = None,
condition_heating_temperature_max: Optional[float] = None,
condition_heating_time_min: Optional[float] = None,
condition_heating_time_max: Optional[float] = None,
condition_heating_atmosphere: Optional[List[str]] = None,
condition_mixing_device: Optional[List[str]] = None,
condition_mixing_media: Optional[List[str]] = None,
):
"""
Search synthesis recipe text.
Arguments:
keywords (List[str]): List of search keywords
keywords (Optional[List[str]]): List of string keywords to search synthesis paragraph text with
synthesis_type (Optional[List[SynthesisTypeEnum]]): Type of synthesis to include
target_formula (Optional[str]): Chemical formula of the target material
precursor_formula (Optional[str]): Chemical formula of the precursor material
operations (Optional[List[OperationTypeEnum]]): List of operations that syntheses must have
condition_heating_temperature_min (Optional[float]): Minimal heating temperature
condition_heating_temperature_max (Optional[float]): Maximal heating temperature
condition_heating_time_min (Optional[float]): Minimal heating time
condition_heating_time_max (Optional[float]): Maximal heating time
condition_heating_atmosphere (Optional[List[str]]): Required heating atmosphere, such as "air", "argon"
condition_mixing_device (Optional[List[str]]): Required mixing device, such as "zirconia", "Al2O3".
condition_mixing_media (Optional[List[str]]): Required mixing media, such as "alcohol", "water"
Returns:
synthesis_docs ([SynthesisDoc]): List of synthesis documents
"""

keyword_string = ",".join(keywords)
# Turn None and empty list into None
keywords = keywords or None
synthesis_type = synthesis_type or None
operations = operations or None
condition_heating_atmosphere = condition_heating_atmosphere or None
condition_mixing_device = condition_mixing_device or None
condition_mixing_media = condition_mixing_media or None

synthesis_docs = self._query_resource(
criteria={"keywords": keyword_string}, use_document_model=True,
criteria={
"keywords": keywords,
"synthesis_type": synthesis_type,
"target_formula": target_formula,
"precursor_formula": precursor_formula,
"operations": operations,
"condition_heating_temperature_min": condition_heating_temperature_min,
"condition_heating_temperature_max": condition_heating_temperature_max,
"condition_heating_time_min": condition_heating_time_min,
"condition_heating_time_max": condition_heating_time_max,
"condition_heating_atmosphere": condition_heating_atmosphere,
"condition_mixing_device": condition_mixing_device,
"condition_mixing_media": condition_mixing_media,
},
use_document_model=True,
).get("data", None)

if synthesis_docs is None:
Expand Down
24 changes: 18 additions & 6 deletions src/mp_api/routes/synthesis/client.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
from typing import List, Optional
from mp_api.routes.synthesis.models import SynthesisSearchResultModel

from mp_api.routes.synthesis.models import (
SynthesisTypeEnum, OperationTypeEnum, SynthesisSearchResultModel
)


class SynthesisRester:

def get_document_by_id(
self,
document_id: str,
fields: Optional[List[str]] = None,
monty_decode: bool = True,
def search_synthesis_text(
self,
keywords: Optional[List[str]] = None,
synthesis_type: Optional[List[SynthesisTypeEnum]] = None,
target_formula: Optional[str] = None,
precursor_formula: Optional[str] = None,
operations: Optional[List[OperationTypeEnum]] = None,
condition_heating_temperature_min: Optional[float] = None,
condition_heating_temperature_max: Optional[float] = None,
condition_heating_time_min: Optional[float] = None,
condition_heating_time_max: Optional[float] = None,
condition_heating_atmosphere: Optional[List[str]] = None,
condition_mixing_device: Optional[List[str]] = None,
condition_mixing_media: Optional[List[str]] = None,
) -> SynthesisSearchResultModel: ...
4 changes: 2 additions & 2 deletions src/mp_api/routes/synthesis/data_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def convert_recipe(recipe):
print('Cannot process materials: ', targets_string)
raise

recipe['targets_formula'] = [json.loads(x.to_json()) for x in target_comps]
recipe['targets_formula'] = [x.formula for x in target_comps]
recipe['targets_formula_s'] = [x.reduced_formula for x in target_comps]
del recipe['targets_string']

Expand All @@ -43,7 +43,7 @@ def convert_recipe(recipe):
except (CompositionError, ValueError):
print('Cannot process precursor material: ', precursor['material_formula'])
continue
recipe['precursors_formula'].append(json.loads(comp.to_json()))
recipe['precursors_formula'].append(comp.formula)
recipe['precursors_formula_s'].append(comp.reduced_formula)

return recipe
Expand Down
17 changes: 8 additions & 9 deletions src/mp_api/routes/synthesis/data_adaptor_synpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from pymatgen.core.composition import CompositionError, Composition
from pymongo import MongoClient
from tqdm import tqdm


def convert_value(val):
Expand Down Expand Up @@ -74,7 +73,7 @@ def convert_material(mat):
'amounts_vars': {x: convert_mat_value(y) for x, y in mat['amounts_vars'].items()},
'elements_vars': {x: [str(z.strip()) for z in y if z.strip()] for x, y in mat['elements_vars'].items()},
'additives': [str(x.strip()) for x in mat['additives'] if x.strip()],
'oxygen_deficiency': str(mat['oxygen_deficiency']) or None
'oxygen_deficiency': str(mat['oxygen_deficiency']) if mat['oxygen_deficiency'] else None,
}


Expand All @@ -84,13 +83,13 @@ def get_material_formula(mat):
formula = re.sub(r'·\d*H2O', '', formula)
try:
return Composition(formula)
except CompositionError:
except (CompositionError, ValueError):
q = None
for comp in mat['composition']:
if q is None:
q = Composition({x: float(y) for x, y in comp['elements'].items()})
q = Composition({x: float(y) for x, y in comp['elements'].items()}) * float(comp['amount'])
else:
q += Composition({x: float(y) for x, y in comp['elements'].items()})
q += Composition({x: float(y) for x, y in comp['elements'].items()}) * float(comp['amount'])
return q


Expand Down Expand Up @@ -135,11 +134,11 @@ def convert_one(doc):
'material': str(x['material'])
} for x in doc['reaction']['right']],
},
'targets_formula': [json.loads(x.to_json()) for x in target_comps(doc)],
'targets_formula': [x.formula for x in target_comps(doc)],
'target': convert_material(doc['target']),
'targets_formula_s': [x.reduced_formula for x in target_comps(doc)],
'precursors_formula_s': [x.reduced_formula for x in precursor_comps(doc)],
'precursors_formula': [json.loads(x.to_json()) for x in precursor_comps(doc)],
'precursors_formula': [x.formula for x in precursor_comps(doc)],
'precursors': [convert_material(x) for x in doc['precursors']],
'operations': [convert_op(x) for x in doc.get('operations', [])]
}
Expand All @@ -154,9 +153,9 @@ def main():

synthesis_recipes = []

for item in tqdm(synpro_db.Reactions_Solid_State.find()):
for item in synpro_db.Reactions_Solid_State.find():
synthesis_recipes.append(convert_one(item))
for item in tqdm(synpro_db.Reactions_Sol_Gel.find()):
for item in synpro_db.Reactions_Sol_Gel.find():
synthesis_recipes.append(convert_one(item))

with open('synthesis_recipes.json', 'w') as f:
Expand Down
5 changes: 5 additions & 0 deletions src/mp_api/routes/synthesis/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ class SynthesisRecipe(BaseModel):


class SynthesisSearchResultModel(SynthesisRecipe):
"""
Model for a document containing synthesis recipes
data and additional keyword search results
"""

search_score: Optional[float] = Field(
None, description="Search score.",
)
Expand Down
114 changes: 59 additions & 55 deletions src/mp_api/routes/synthesis/query_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,49 +16,49 @@ class SynthesisSearchQuery(QueryOperator):
"""

def query(
self,
keywords: Optional[str] = Query(
None,
description="Comma delimited string keywords to search synthesis paragraph text with.",
),
synthesis_type: Optional[List[SynthesisTypeEnum]] = Query(
None, description="Type of synthesis to include."
),
target_formula: Optional[str] = Query(
None, description="Chemical formula of the target material."
),
precursor_formula: Optional[str] = Query(
None, description="Chemical formula of the precursor material."
),
operations: Optional[List[OperationTypeEnum]] = Query(
None, description="List of operations that syntheses must have."
),
condition_heating_temperature_min: Optional[float] = Query(
None, description="Minimal heating temperature."
),
condition_heating_temperature_max: Optional[float] = Query(
None, description="Maximal heating temperature."
),
condition_heating_time_min: Optional[float] = Query(
None, description="Minimal heating time."
),
condition_heating_time_max: Optional[float] = Query(
None, description="Maximal heating time."
),
condition_heating_atmosphere: Optional[List[str]] = Query(
None, description='Required heating atmosphere, such as "air", "argon".'
),
condition_mixing_device: Optional[List[str]] = Query(
None, description='Required mixing device, such as "zirconia", "Al2O3".'
),
condition_mixing_media: Optional[List[str]] = Query(
None, description='Required mixing media, such as "alcohol", "water".'
),
skip: int = Query(0, description="Number of entries to skip in the search"),
limit: int = Query(
10,
description="Max number of entries to return in a single query. Limited to 10.",
),
self,
keywords: Optional[str] = Query(
None,
description="Comma delimited string keywords to search synthesis paragraph text with.",
),
synthesis_type: Optional[List[SynthesisTypeEnum]] = Query(
None, description="Type of synthesis to include."
),
target_formula: Optional[str] = Query(
None, description="Chemical formula of the target material."
),
precursor_formula: Optional[str] = Query(
None, description="Chemical formula of the precursor material."
),
operations: Optional[List[OperationTypeEnum]] = Query(
None, description="List of operations that syntheses must have."
),
condition_heating_temperature_min: Optional[float] = Query(
None, description="Minimal heating temperature."
),
condition_heating_temperature_max: Optional[float] = Query(
None, description="Maximal heating temperature."
),
condition_heating_time_min: Optional[float] = Query(
None, description="Minimal heating time."
),
condition_heating_time_max: Optional[float] = Query(
None, description="Maximal heating time."
),
condition_heating_atmosphere: Optional[List[str]] = Query(
None, description='Required heating atmosphere, such as "air", "argon".'
),
condition_mixing_device: Optional[List[str]] = Query(
None, description='Required mixing device, such as "zirconia", "Al2O3".'
),
condition_mixing_media: Optional[List[str]] = Query(
None, description='Required mixing media, such as "alcohol", "water".'
),
skip: int = Query(0, description="Number of entries to skip in the search"),
limit: int = Query(
10,
description="Max number of entries to return in a single query. Limited to 10.",
),
):
project_dict: Dict[str, Union[Dict, int]] = {
"_id": 0,
Expand Down Expand Up @@ -131,21 +131,25 @@ def query(
if operations:
crit["operations.type"] = {"$all": operations}
if condition_heating_temperature_min is not None:
crit["operations.conditions.heating_temperature.values"] = {
"$gte": condition_heating_temperature_min
}
field = "operations.conditions.heating_temperature.values"
if field not in crit:
crit[field] = {"$elemMatch": {}}
crit[field]["$elemMatch"]["$gte"] = condition_heating_temperature_min
if condition_heating_temperature_max is not None:
crit["operations.conditions.heating_temperature.values"] = {
"$lte": condition_heating_temperature_max
}
field = "operations.conditions.heating_temperature.values"
if field not in crit:
crit[field] = {"$elemMatch": {}}
crit[field]["$elemMatch"]["$lte"] = condition_heating_temperature_max
if condition_heating_time_min is not None:
crit["operations.conditions.heating_time.values"] = {
"$gte": condition_heating_time_min
}
field = "operations.conditions.heating_time.values"
if field not in crit:
crit[field] = {"$elemMatch": {}}
crit[field]["$elemMatch"]["$gte"] = condition_heating_time_min
if condition_heating_time_max is not None:
crit["operations.conditions.heating_time.values"] = {
"$lte": condition_heating_time_max
}
field = "operations.conditions.heating_time.values"
if field not in crit:
crit[field] = {"$elemMatch": {}}
crit[field]["$elemMatch"]["$lte"] = condition_heating_time_max
if condition_heating_atmosphere:
crit["operations.conditions.heating_atmosphere"] = {
"$all": condition_heating_atmosphere
Expand Down
Loading

0 comments on commit 329740e

Please sign in to comment.