In [None]:
# https://contribs.materialsproject.org/projects/mg_cathode_screening_2022
from mpcontribs.client import Client, Attachment
from flatten_dict import flatten, unflatten
from pymatgen.core import Structure
from datetime import datetime

In [None]:
client = Client(project="mg_cathode_screening_2022") # provide API key via `apikey` argument

## Initial MPContribs Page Set-Up

In [None]:
# update project info
client.update_project(update={"title": "Mg Cathode Screening"})

In [None]:
client.update_project(update={"description":"A computational screening approach to identify high-performance multivalent intercalation cathodes among materials that do not contain the working ion of interest has been developed, which greatly expands the search space that can be considered for material discovery (https://doi.org/10.1021/acsami.2c11733). This magnesium intercalation cathode data set of phase stability, energy density, & transport properties has been generated using these methods but applied to a larger set of materials than the original publication. 5,853 empty host materials of the 16,682 materials previously down selected based on their reducible species oxidation state were prioritized for Mg insertions based on excluding candidates which contained an extractable ion (H, Li, Na, K, Rb, Cs, Mg, Ca, Cs, Ag, Cu). Of these 5,863 attempted Mg insertion workflows, 83% resulted in at least one viable Mg site. This ultimately resulted in 4,872 Mg cathodes from which 229 ApproxNEB workflows were attempted. There were 193 unique structure types in these 229 candidates. All ApproxNEB images calculations successfully completed for 97 electrodes. This data set uses the following python objects: pymatgen.apps.battery.insertion_battery.InsertionElectrode and pymatgen.analysis.diffusion.neb.full_path_mapper.MigrationGraph"})

In [None]:
# add legend for project in `other`
client.update_project(update={"other": {"identifier": "Material Project ID for empty host material",
                                        "formula": "Empty host material chemical formula",
                                        
                                        "host.formulaAnonymous": "Empty host material anonumous chemical formula",
                                        "host.nelements": "Number of distinct elements in empty host material",
                                        "host.chemsys": "Empty host material chemical system of distinct elements sorted alphabetically and joined by dashes",
                                         
                                        "ICSD.exp": "Whether empty host material is an ICSD experimental structure",
                                        "ICSD.ids": "Identifiers for the Inorganic Crystal Structure Database",
                                         
                                        "battery.id": "Unique identifier for electrode where 'js-' distinguishes calculations from the screening development phase",
                                        "battery.formula": "Electrode chemical formula including the working ion fraction",
                                        "battery.workingIon": "Battery system working ion",
                                        "battery.voltage": "Average voltage in Volts across all voltage pairs",
                                        "battery.capacity": "Total gravimetric capacity in mAh/g of cathode active material",
                                        "battery.stability|charge": "Energy above hull in eV/atom, a metric of the phase stability of the charged (empty) state",
                                        "battery.stability|discharge": "Energy above hull in eV/atom, a metric of the phase stability of the discharged (intercalated) state",
                                        "battery.Δvolume": "Largest volume change in % across all voltage pairs",
                                         
                                        "MigrationGraph.found": "Whether a migration graph mapping out connections between working ion sites could be successfully generated",
                                        "MigrationGraph.npaths": "The number of possible percolating pathways identified from the migration graph",
                                         
                                        "ApproxNEB.uuid": "If available, identifier for ApproxNEB calculations for migration graph pathway energetics",
                                        "ApproxNEB.complete": "If ApproxNEB calculations are available, the fraction of calculations that were successfully completed",
}})

## Reset Data if Reuploading

In [None]:
client.get_project()

In [None]:
#client.delete_contributions()

## Set Project Data Columns

In [None]:
columns = {
    "formula_anonymous": {"name": "host.formulaAnonymous", "unit": None},
    "nelements": {"name": "host.nelements", "unit": ""},
    "chemsys": {"name": "host.chemsys", "unit": None},
    
    "icsd_experimental": {"name": "ICSD.exp", "unit": None}, # convert bool to Yes/No string
    "icsd_ids": {"name": "ICSD.ids", "unit": None},
    
    "battery_id": {"name": "battery.id", "unit": None},
    "battery_formula": {"name": "battery.formula", "unit": None},
    "working_ion": {"name": "battery.workingIon", "unit": None},
    "average_voltage": {"name": "battery.voltage", "unit": "V"},
    "capacity_grav": {"name": "battery.capacity", "unit": "mAh/g"}, 
    "stability_charge": {"name": "battery.stability|charge", "unit": "eV/atom"},
    "stability_discharge": {"name": "battery.stability|discharged", "unit": "eV/atom"},
    "max_delta_volume": {"name": "battery.Δvolume", "unit": "%"},
    
    "migration_graph_found": {"name": "MigrationGraph.found", "unit": None},
    "num_paths_found": {"name": "MigrationGraph.npaths", "unit": ""},# emptry string indicates dimensionless number
    
    "aneb_wf_uuid": {"name": "ApproxNEB.uuid", "unit": None},
    "aneb_wf_complete": {"name": "ApproxNEB.complete", "unit": ""},
}

In [None]:
columns_map = {v["name"]: v["unit"] for k, v in columns.items()}
client.init_columns(columns_map)

## Get Project Data

In [None]:
from maggma.stores.advanced_stores import MongograntStore

In [None]:
# Includes both insertion electrodes and migration graph builder data
# Applies cost function based on voltage and stability (specific to Mg) for prioritizing electrodes
# Created by custom MapBuilder: https://github.com/materialsproject/emmet/commit/692bdf5eff67fe1b0f48e1a13cee999af9136aae
rank_store = MongograntStore(
    "ro:mongodb07-ext.nersc.gov/fw_acr_mv","rank_electrodes_2022",key="battery_id"
)
rank_store.connect()
print(rank_store.count())

# Raw ApproxNEB workflow data (note 2 of the 229 ApproxNEB workflows had unsuccessful host calculations)
aneb_store = MongograntStore(
    "ro:mongodb07-ext.nersc.gov/fw_acr_mv","approx_neb",key="wf_uuid"
)
aneb_store.connect()
print(aneb_store.count(),aneb_store.count({"tags":{"$all":["migration_graph_2022"]}}))

In [None]:
columns_map = {v["name"]: v["unit"] for k, v in columns.items()}
client.init_columns(columns_map)

In [None]:
contrib_docs = []
for bid in bids:
    rank_doc = rank_store.query_one({"battery_id":bid})
    aneb_doc = aneb_store.query_one({"battery_id":bid})

    contrib_doc = {
        "battery_id":bid,
        # host structure properties
        "host_mp_ids":rank_doc["host_mp_ids"],
        "icsd_experimental":rank_doc["icsd_experimental"],
        "icsd_ids":rank_doc["host_icsd_ids"],
        "formula":rank_doc["framework_formula"],
        "formula_anonymous":rank_doc["formula_anonymous"],
        "nelements":rank_doc["nelements"],
        "chemsys":rank_doc["chemsys"],
        "composition":rank_doc["framework"],
        "structure":rank_doc["host_structure"],
        # electrode properties
        "working_ion":rank_doc["working_ion"],
        "electrode_object":rank_doc["electrode_object"],
        "battery_formula":rank_doc["battery_formula"],
        "average_voltage":rank_doc["average_voltage"],
        "capacity_grav":rank_doc["capacity_grav"],
        "stability_charge":rank_doc["stability_charge"],
        "stability_discharge":rank_doc["stability_discharge"],
        "max_delta_volume":100*rank_doc["max_delta_volume"], #convert to percentage
        # migration graph properties
        "migration_graph_found":True if rank_doc["migration_graph"] else False,
        "migration_graph":{"battery_id":bid,
                           "migration_graph":rank_doc["migration_graph"],
                           "hop_cutoff":rank_doc["hop_cutoff"],
                           "entries_for_generation":rank_doc["entries_for_generation"],
                           "working_ion_entry":rank_doc["working_ion_entry"],
                          },
        "num_paths_found":rank_doc["num_paths_found"],
    }
    
    if aneb_doc is not None:
        # get aneb data for each hop
        aneb_wf_uuid = aneb_doc["wf_uuid"]
        aneb_wf_data = {}
        for aneb_hop_key,hop_key in aneb_doc["hop_combo_mapping"].items():
            combo = aneb_hop_key.split("+")
            if len(combo) == 2:
                c = [int(combo[0]), int(combo[1])]
            data = [aneb_doc["end_points"][c[0]]]
            if "images" not in aneb_doc.keys():
                data.extend([{"index":i} for i in range(5)])
            else:
                if aneb_hop_key in aneb_doc["images"]:
                    data.extend(aneb_doc["images"][aneb_hop_key])
                else:
                    data.extend([{"index":i} for i in range(5)])
                data.append(aneb_doc["end_points"][c[1]])
            aneb_wf_data.update({hop_key:data})
        aneb_host = aneb_doc["host"]
        
        # determine fraction of aneb data available
        total = 0
        complete = 0
        for k,v in aneb_wf_data.items():
            total += len(v)
            complete += len([i for i in v if "output" in i.keys()])
        aneb_wf_complete = complete / total
        
    else:
        aneb_wf_uuid = None
        aneb_host = None
        aneb_wf_data = None
        aneb_wf_complete = None
         
    # add aneb wf properties and data
    contrib_doc.update({
        "aneb_wf_uuid":aneb_wf_uuid,
        "aneb_wf_data":{"conversion_matrix":rank_doc["conversion_matrix"],
                        "matrix_supercell_structure":rank_doc["matrix_supercell_structure"],
                        "inserted_ion_coords":rank_doc["inserted_ion_coords"],
                        "insert_coords_combo":rank_doc["insert_coords_combo"],
                        "host_data":aneb_host,
                        "hop_data":aneb_wf_data,
                       },
        "aneb_wf_complete":aneb_wf_complete
    })
    
    # clean-up formatting for MP Contribs
    for k,v in contrib_doc.items():
        if type(v) is bool:
            if v is True:
                contrib_doc[k] = "yes"
            if v is False:
                contrib_doc[k] = "no"
        if type(v) is list:
            if k in ["icsd_ids"]:
                if len(v) == 0:
                    contrib_doc[k] = ""
                if len(v) == 1:
                    contrib_doc[k] = str(v[0])
                elif len(v) > 1:
                    contrib_doc[k] = ",".join(str(i) for i in v)
                    
    contrib_docs.append(contrib_doc)
print(len(contrib_docs),"original")

In [None]:
from copy import deepcopy

In [None]:
# duplicate and split entries if multiple mp_ids
docs = []
for d in contrib_docs:
    if len(d["host_mp_ids"]) > 1:
        for mp_id in d["host_mp_ids"]:
            temp_doc = deepcopy(d)
            temp_doc["host_mp_ids"] = [mp_id]
            docs.append(temp_doc)
    else:
        docs.append(d)
contrib_docs = docs
print(len(contrib_docs),"split")

## Upload Contributions

In [None]:
contributions = []
structure_keys = ["structure"]
attachment_keys = ["electrode_object", "migration_graph", "aneb_wf_data"]

for doc in contrib_docs:
    identifier = doc["host_mp_ids"][0] if doc["host_mp_ids"] else doc["battery_id"]
    formula = doc["formula"]
    contrib = {"identifier": identifier, "formula": formula, "data": {}, "structures": [], "attachments": []}
    
    for k in structure_keys:
        sdct = doc.pop(k, None)
        if sdct:
            structure = Structure.from_dict(sdct)
            structure.name = k
            contrib["structures"].append(structure)
        
    for k in attachment_keys:
        # skip attachments if not available
        if k == "migration_graph" and doc["migration_graph_found"] == "no":
            pass
        elif k == "aneb_wf_data" and doc["aneb_wf_uuid"] is None:
            pass
        else:
            attm_dct = doc.pop(k, None)
            if attm_dct:
                attm = Attachment.from_data(k, attm_dct)
                contrib["attachments"].append(attm)
        
    clean = {k: v for k, v in doc.items() if k[0] != "_" and not isinstance(v, datetime)}
    raw = Attachment.from_data("raw", clean)
    contrib["attachments"].append(raw)
        
    flat_doc = flatten(clean, max_flatten_depth=2, reducer="dot")
    for col, config in columns.items():
        value = flat_doc.get(col)
        if value:
            name, unit = config["name"], config["unit"]
            contrib["data"][name] = f"{value:.3g} {unit}" if unit else value
    
    contrib["data"] = unflatten(contrib["data"], splitter="dot")
    contributions.append({k: v for k, v in contrib.items() if v})

len(contributions)

In [None]:
contributions[0]["data"]

In [None]:
# client.delete_contributions()
# client.init_columns(columns_map)

In [None]:
# client.submit_contributions(contributions, ignore_dupes=True)
# client.init_columns(columns_map)

## Query / Check Attachments

In [None]:
import pandas as pd
import json
import numpy as np
from pydash import get

from pymatgen.core import Structure
from pymatgen.apps.battery.insertion_battery import InsertionElectrode
from emmet.core.mobility.migrationgraph import MigrationGraphDoc
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from pymatgen.analysis.diffusion.utils.edge_data_from_sc import add_edge_data_from_sc

In [None]:
query = {
    "identifier": "mp-10093"
}
fields = ["identifier","ICSD.ids","attachments"]
contribs = client.query_contributions(query=query, fields=fields, sort="identifier", paginate=True)
pd.json_normalize(contribs["data"])

In [None]:
contribs["data"][0]["attachments"]

In [None]:
# manually update for given data entry to check attachments
ie_attm = client.get_attachment("649e04a46b124d797b301add")
mg_attm = client.get_attachment("649e04a46b124d797b301ade")
aneb_attm = client.get_attachment("649e04a46b124d797b301adf")

In [None]:
ie = InsertionElectrode.from_dict(json.loads(ie_attm.unpack()))
mgd = MigrationGraphDoc.parse_obj(json.loads(mg_attm.unpack()))
mg = MigrationGraph.from_dict(json.loads(mg_attm.unpack())["migration_graph"])
aneb_data = json.loads(aneb_attm.unpack())

In [None]:
# use migration graph to identify possible pathways
mg.assign_cost_to_graph()
for n,path in mg.get_path():
    print("path",n)
    for hop in path:
        print(hop["ipos"],hop["epos"],hop["to_jimage"])
    print()

In [None]:
# map ApproxNEB data onto migration graph
for k,v in aneb_data["hop_data"].items():
    sc_structs = [Structure.from_dict(i["input_structure"]) for i in v]
    energies = [get(i, "output.energy") for i in v]
    add_edge_data_from_sc(
        mg,i_sc=sc_structs[0],e_sc=sc_structs[-1],data_array=sc_structs,key="sc_structs"
    )
    add_edge_data_from_sc(
        mg,i_sc=sc_structs[0],e_sc=sc_structs[-1],data_array=energies,key="energies"
    )

In [None]:
# evaluate pathway energetics using ApproxNEB data
for n,path in mg.get_path():
    #for hop in path:
        #print(hop["ipos"],hop["epos"],hop["to_jimage"])
    energies = np.array([hop["energies"] for hop in path],dtype=float)
    path_barrier = 1000 * (energies.max() - energies.min())
    print("path", n, "ApproxNEB barrier", round(path_barrier), "meV")