Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new synthesis recipes to API. #257

Merged
merged 19 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 2 additions & 24 deletions src/mp_api/routes/synthesis/client.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,8 @@
from typing import List

from mp_api.core.client import BaseRester
from mp_api.routes.synthesis.models import SynthesisDoc
from mp_api.routes.synthesis.models import SynthesisRecipe


class SynthesisRester(BaseRester):

suffix = "synthesis"
document_model = SynthesisDoc # type: ignore
primary_key = "task_id"

def search_synthesis_text(self, keywords: List[str]):
"""
Search synthesis recipe text.

Arguments:
keywords (List[str]): List of search keywords

Returns:
synthesis_docs ([SynthesisDoc]): List of synthesis documents
"""

keyword_string = ",".join(keywords)

synthesis_docs = self._query_resource(
criteria={"keywords": keyword_string}, suburl="text_search", use_document_model=True,
)

return synthesis_docs
document_model = SynthesisRecipe
12 changes: 3 additions & 9 deletions src/mp_api/routes/synthesis/client.pyi
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
from typing import List, Optional
from mp_api.routes.synthesis.models import SynthesisDoc
from typing import List
from mp_api.routes.synthesis.models import SynthesisRecipe


class SynthesisRester:

def get_document_by_id(
self,
document_id: str,
fields: Optional[List[str]] = None,
monty_decode: bool = True,
version: Optional[str] = None,
) -> SynthesisDoc:
def query_text(self, keywords: List[str]) -> SynthesisRecipe:
...
77 changes: 77 additions & 0 deletions src/mp_api/routes/synthesis/data_adaptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
This script converts synthesis recipes data fetched directly
from the public repo of synthesis recipes
(https://github.com/CederGroupHub/text-mined-synthesis_public)
into MP compatible formats.
"""
import json
import sys

from pymatgen.core import Composition
from pymatgen.core.composition import CompositionError


def string2comp(x):
"""Convert string material formulas into pymatgen Compositions."""
# TODO: if a material contains multiple parts, this function
# only takes the first part. This is not the optimal solution,
# and should be resolved in the future.
formula = x.split('·')

# return reduce(add, [Composition(x) for x in formula])
return Composition(formula[0])


def convert_recipe(recipe):
"""Convert an entire synthesis recipe."""
targets_string = recipe['targets_string']
try:
target_comps = [string2comp(x) for x in targets_string]
except (CompositionError, ValueError):
print('Cannot process materials: ', targets_string)
raise

recipe['targets_formula'] = [json.loads(x.to_json()) for x in target_comps]
recipe['targets_formula_s'] = [x.reduced_formula for x in target_comps]
del recipe['targets_string']

recipe['precursors_formula'] = []
recipe['precursors_formula_s'] = []
for precursor in recipe['precursors']:
try:
comp = string2comp(precursor['material_formula'])
except (CompositionError, ValueError):
print('Cannot process precursor material: ', precursor['material_formula'])
continue
recipe['precursors_formula'].append(json.loads(comp.to_json()))
recipe['precursors_formula_s'].append(comp.reduced_formula)

return recipe


def convert_json_public_repo(src_json, dst_json):
"""
Convert the public synthesis recipes dataset (in a json file)
into a format as json file which can be imported into the MP database.
"""
with open(src_json) as f:
data = json.load(f)
recipes = data['reactions']

print('Loaded %s recipes, version %s' % (len(recipes), data['release_date']))

converted = []
for recipe in recipes:
try:
convert_recipe(recipe)
converted.append(recipe)
except (CompositionError, ValueError, IndexError):
pass

print('Converted %d recipes' % (len(converted),))
with open(dst_json, 'w') as f:
json.dump(converted, f)


if __name__ == '__main__':
convert_json_public_repo(sys.argv[1], sys.argv[2])
169 changes: 169 additions & 0 deletions src/mp_api/routes/synthesis/data_adaptor_synpro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""
This script converts synthesis recipes data fetched directly
from Ceder Group Synthesis Mining team MongoDB into MP compatible
formats.
"""
import json
import os
import re

from pymatgen.core.composition import CompositionError, Composition
from pymongo import MongoClient
from tqdm import tqdm


def convert_value(val):
"""Convert values in operation conditions dictionaries."""
return {
'min_value': float(val['min']) if val['min'] is not None else None,
'max_value': float(val['max']) if val['max'] is not None else None,
'values': [float(x) for x in val['values']],
'units': str(val['units']),
}


def convert_conditions(cond, op_type):
"""Convert conditions dictionaries."""
return {
'heating_temperature': [convert_value(x) for x in cond['temperature']],
'heating_time': [convert_value(x) for x in cond['time']],
'heating_atmosphere': [x.strip() for x in cond['environment'] if
x.strip()] if op_type == 'HeatingOperation' else [],
'mixing_device': (cond['environment'][1].strip() if cond['environment'][
1].strip() else None) if op_type == 'MixingOperation' else None,
'mixing_media': (cond['environment'][0].strip() if cond['environment'][
0].strip() else None) if op_type == 'MixingOperation' else None,
}


all_posible_ops = set()


def convert_op(op):
"""Convert operation dictionaries."""
all_posible_ops.add(op['type'])
return {
'type': op['type'],
'token': op['string'],
'conditions': convert_conditions(op['attributes'], op['type'])
}


def convert_mat_value(val):
"""Convert values specified in materials elements_vars."""
return {
'values': [float(x) for x in val['values']],
'min_value': float(val['min_value']) if val['min_value'] is not None else None,
'max_value': float(val['max_value']) if val['max_value'] is not None else None,
}


def convert_material(mat):
"""Convert materials dictionaries."""
return {
'material_string': str(mat['material_string']),
'material_name': str(mat['material_name']),
'material_formula': str(mat['material_formula']),
'phase': str(mat['phase']) or None,
'is_acronym': bool(mat['is_acronym']),
'composition': [{
'formula': str(x['formula']),
'amount': str(x['amount']),
'elements': {str(y): str(z) for y, z in x['elements'].items()}
} for x in mat['composition']],
'amounts_vars': {x: convert_mat_value(y) for x, y in mat['amounts_vars'].items()},
'elements_vars': {x: [str(z.strip()) for z in y if z.strip()] for x, y in mat['elements_vars'].items()},
'additives': [str(x.strip()) for x in mat['additives'] if x.strip()],
'oxygen_deficiency': str(mat['oxygen_deficiency']) or None
}


def get_material_formula(mat):
"""Convert string material formulas into pymatgen Compositions."""
formula = mat['material_formula']
formula = re.sub(r'·\d*H2O', '', formula)
try:
return Composition(formula)
except CompositionError:
q = None
for comp in mat['composition']:
if q is None:
q = Composition({x: float(y) for x, y in comp['elements'].items()})
else:
q += Composition({x: float(y) for x, y in comp['elements'].items()})
return q


def target_comps(doc):
"""Find all target material formulas and convert them into Composition."""
result = []
for x in doc['targets_string']:
if not x.strip():
continue
try:
result.append(Composition(x))
except (CompositionError, ValueError):
pass
return result


def precursor_comps(doc):
"""Find all precursor material formulas and convert them into Composition."""
result = []
for x in doc['precursors']:
try:
result.append(get_material_formula(x))
except (CompositionError, ValueError):
pass
return result


def convert_one(doc):
"""Convert an entire synthesis recipe."""
return {
'doi': str(doc['doi']),
'paragraph_string': ' '.join(doc['ext_paragraph']),
'synthesis_type': str(doc['synthesis_type']),
'reaction_string': str(doc['reaction_string']),
'reaction': {
'left_side': [{
'amount': str(x['amount']),
'material': str(x['material'])
} for x in doc['reaction']['left']],
'right_side': [{
'amount': str(x['amount']),
'material': str(x['material'])
} for x in doc['reaction']['right']],
},
'targets_formula': [json.loads(x.to_json()) for x in target_comps(doc)],
'target': convert_material(doc['target']),
'targets_formula_s': [x.reduced_formula for x in target_comps(doc)],
'precursors_formula_s': [x.reduced_formula for x in precursor_comps(doc)],
'precursors_formula': [json.loads(x.to_json()) for x in precursor_comps(doc)],
'precursors': [convert_material(x) for x in doc['precursors']],
'operations': [convert_op(x) for x in doc.get('operations', [])]
}


def main():
"""
Convert the Reactions_Solid_State/Reactions_Sol_Gel collection in
Ceder Group database into a json file which can be imported into the MP database.
"""
synpro_db = MongoClient(os.environ['SYNPRO_URI']).SynPro

synthesis_recipes = []

for item in tqdm(synpro_db.Reactions_Solid_State.find()):
synthesis_recipes.append(convert_one(item))
for item in tqdm(synpro_db.Reactions_Sol_Gel.find()):
synthesis_recipes.append(convert_one(item))

with open('synthesis_recipes.json', 'w') as f:
json.dump(synthesis_recipes, f)

print('All possible operation types', all_posible_ops)


if __name__ == '__main__':
main()
24 changes: 0 additions & 24 deletions src/mp_api/routes/synthesis/models.py

This file was deleted.

18 changes: 18 additions & 0 deletions src/mp_api/routes/synthesis/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from mp_api.routes.synthesis.models.core import (
SynthesisRecipe,
SynthesisTypeEnum,
)
from mp_api.routes.synthesis.models.materials import (
Component,
ExtractedMaterial,
)
from mp_api.routes.synthesis.models.operations import (
Value,
Conditions,
Operation,
OperationTypeEnum,
)
from mp_api.routes.synthesis.models.reaction import (
FormulaPart,
ReactionFormula,
)
Loading