In [None]:
# https://contribs.materialsproject.org/projects/mg_cathode_screening_2022
from mpcontribs.client import Client, Attachment
from flatten_dict import flatten, unflatten
from pymatgen.core import Structure
from datetime import datetime

In [None]:
client = Client(project="mg_cathode_screening_2022") # provide API key via `apikey` argument

In [None]:
# update project info
client.update_project(update={"title": "Mg Cathode Screening"})

In [None]:
# add legend for project in `other`
client.update_project(update={"other": {"Ef": "formation energy in eV"}})

In [None]:
client.get_project()

In [None]:
from maggma.stores.advanced_stores import MongograntStore

In [None]:
rank_store = MongograntStore(
    "ro:mongodb07-ext.nersc.gov/fw_acr_mv", "rank_electrodes_2022", key="battery_id"
)
rank_store.connect()

In [None]:
rank_store.count()

In [None]:
docs = rank_store.query(limit=100)

In [None]:
columns = {
    "battery_id": {"name": "battery.id", "unit": None}, # None indicates string type
    "battery_formula": {"name": "battery.formula", "unit": None},
    "max_delta_volume": {"name": "ΔV|max", "unit": "Å³"},
    "average_voltage": {"name": "V|avg", "unit": "V"},
    "capacity_grav": {"name": "capacity.gravimetric", "unit": ""}, # emptry string indicates dimensionless number
    "capacity_vol": {"name": "capacity.volumetric", "unit": ""},
    "energy_grav": {"name": "energy.gravimetric", "unit": ""},
    "energy_vol": {"name": "energy.volumetric", "unit": ""},
    "fracA_discharge": {"name": "discharge.fracA", "unit": ""},
    "formula_charge": {"name": "charge.formula", "unit": None},
    "formula_discharge": {"name": "discharge.formula", "unit": None},
    "stability_charge": {"name": "charge.stability", "unit": ""},
    "stability_discharge": {"name": "discharge.stability", "unit": ""},
    "id_charge": {"name": "charge.id", "unit": ""},
    "id_discharge": {"name": "discharge.id", "unit": ""},
    "framework_formula": {"name": "formulas.framework", "unit": None},
    "formula_anonymous": {"name": "formulas.anonymous", "unit": None},
    "num_steps": {"name": "nsteps", "unit": ""},
    "nelements": {"name": "nelements", "unit": ""},
    "chemsys": {"name": "chemsys", "unit": None},
    "working_ion": {"name": "workingIon", "unit": None},
    "icsd_experimental": {"name": "ICSD|exp", "unit": None}, # convert bool to Yes/No string
    "hop_cutoff": {"name": "hop|cutoff", "unit": ""},
    "num_paths_found": {"name": "npaths", "unit": ""},
    "cost.total": {"name": "cost.total", "unit": ""},
    "cost.voltage": {"name": "cost.voltage", "unit": "V"},
    "cost.chg_stability": {"name": "cost.stability.charge", "unit": ""},
    "cost.dchg_stability": {"name": "cost.stability.discharge", "unit": ""},
}

In [None]:
columns_map = {v["name"]: v["unit"] for k, v in columns.items()}
client.init_columns(columns_map)

In [None]:
contributions = []
structure_keys = ["host_structure", "matrix_supercell_structure"]
attachment_keys = ["electrode_object", "entries_for_generation", "migration_graph", "inserted_ion_coords"]

for doc in docs:
    identifier = doc["host_mp_ids"][0] if doc["host_mp_ids"] else doc["battery_id"]
    contrib = {"identifier": identifier, "data": {}, "structures": [], "attachments": []}
    
    for k in structure_keys:
        sdct = doc.pop(k, None)
        if sdct:
            structure = Structure.from_dict(sdct)
            structure.name = k
            contrib["structures"].append(structure)
        
    for k in attachment_keys:
        attm_dct = doc.pop(k, None)
        if attm_dct:
            attm = Attachment.from_data(k, attm_dct)
            contrib["attachments"].append(attm)
        
    clean = {k: v for k, v in doc.items() if k[0] != "_" and not isinstance(v, datetime)}
    raw = Attachment.from_data("raw", clean)
    contrib["attachments"].append(raw)
        
    flat_doc = flatten(clean, max_flatten_depth=2, reducer="dot")
    for col, config in columns.items():
        value = flat_doc.get(col)
        if value:
            name, unit = config["name"], config["unit"]
            contrib["data"][name] = f"{value:.3g} {unit}" if unit else value
    
    contrib["data"] = unflatten(contrib["data"], splitter="dot")
    contributions.append({k: v for k, v in contrib.items() if v})

len(contributions)

In [None]:
contributions[0]["data"]

In [None]:
client.delete_contributions()
client.init_columns(columns_map)

In [None]:
client.submit_contributions(contributions, ignore_dupes=True)
client.init_columns(columns_map)

In [None]:
# NOTE query example from different project
# Material-screening: find materials with Bi element, spin-orbit spillage >= 0.5,
# OptB88vdW bandgaps > 0.01, energy above convex hull < 0.1 eV/atom , and  SLME > 5%
query = {
    "formula__contains": "Bi",
    "data__spillage__value__gte": 0.5,
    "data__bandgaps__OptB88vdW__value__gt": 0.01,
    "data__energies__hull__value__lt": 0.1,
    "data__SLME__value__gt": 5
}
fields = [
    "identifier", "formula", "data.spillage.value", "data.bandgaps.OptB88vdW.value",
    "data.energies.hull.value", "data.SLME.value",
]
contribs = client.query_contributions(query=query, fields=fields, sort=sort, paginate=True)
pd.json_normalize(contribs["data"])