In [None]:
import json
from mpcontribs.client import Client, Attachment
from pathlib import Path
from flatten_dict import flatten, unflatten
from pymatgen.core import Structure

In [None]:
client = Client(project="ferroelectrics")

In [None]:
# client.update_project(update={
#     "references": [
#         {"label": "SciData", "url": "https://doi.org/10.1038/s41597-020-0407-9"},
#         {"label": "PyMatGen", "url": "https://github.com/materialsproject/pymatgen/tree/master/pymatgen/analysis/ferroelectricity"},
#         {"label": "Atomate", "url": "https://github.com/hackingmaterials/atomate/blob/master/atomate/vasp/workflows/base/ferroelectric.py"},
#         {"label": "Website", "url": "https://blondegeek.github.io/ferroelectric_search_site/"},
#         {"label": "Figshare", "url": "https://dx.doi.org/10.6084/m9.figshare.6025634"}
#     ]
# })

In [None]:
datadir = Path("/Users/patrick/Downloads/6025634")
distortions_file = datadir / "distortions.json"
workflow_data_file = datadir / "workflow_data.json"

with distortions_file.open() as f:
    distortions = json.load(f)
    
with workflow_data_file.open() as f:
    workflow_data = json.load(f)

In [None]:
columns = {
    "search_id": {"name": "id|search", "unit": ""},
    "bilbao_nonpolar_spacegroup": {"name": "bilbao.spacegroup.nonpolar", "unit": ""},
    "bilbao_polar_spacegroup": {"name": "bilbao.spacegroup.polar", "unit": ""},
    "distortion.dav": {"name": "distortion.dav", "unit": ""},
    "distortion.delta": {"name": "distortion.delta", "unit": ""},
    "distortion.dmax": {"name": "distortion.dmax", "unit": ""},
    "distortion.s": {"name": "distortion.s", "unit": ""},
    "nonpolar_band_gap": {"name": "nonpolar.bandgap", "unit": "eV"},
    "nonpolar_icsd": {"name": "nonpolar.icsd", "unit": ""},
    "nonpolar_id": {"name": "nonpolar.mpid", "unit": None},
    "nonpolar_spacegroup": {"name": "nonpolar.spacegroup", "unit": ""},
    "polar_band_gap": {"name": "polar.bandgap", "unit": "eV"},
    "polar_icsd": {"name": "polar.icsd", "unit": ""},
    "polar_id": {"name": "polar.mpid", "unit": None},
    "polar_spacegroup": {"name": "polar.spacegroup", "unit": ""},
    "calculated_max_distance": {"name": "distance", "unit": ""},
    "energies_per_atom_max_spline_jumps": {"name": "energies.jumps|max", "unit": "eV/atom"},
    "energies_per_atom_smoothness": {"name": "energies.smoothness", "unit": "eV"},
    "polarization_change_norm": {"name": "polarization", "unit": ""},
}

In [None]:
contribs_distortions = {}

for distortion in distortions:
    k1, k2 = distortion["nonpolar_id"], distortion["polar_id"]
    key = f"{k1}_{k2}"
    contribs_distortions[key] = {"data": {}, "structures": [], "attachments": []}
    
    for k, v in flatten(distortion, reducer="dot", max_flatten_depth=2).items():
        if k.endswith("_pre") or k.startswith("_id"):
            continue 
        elif not isinstance(v, (dict, list)):
            conf = columns.get(k)
            if conf:
                name, unit = conf["name"], conf["unit"]
                contribs_distortions[key]["data"][name] = f"{v} {unit}" if unit else v
        elif isinstance(v, dict) and "@class" in v and v["@class"] == "Structure":
            structure = Structure.from_dict(v)
            structure.name = k
            contribs_distortions[key]["structures"].append(structure)
            
    attm = Attachment.from_data("distortion", distortion)
    contribs_distortions[key]["attachments"].append(attm)

In [None]:
contributions = []
structure_keys = ("orig_nonpolar_structure", "orig_polar_structure")

for wf in workflow_data:
    k1, k2 = wf["nonpolar_id"], wf["polar_id"]
    key = f"{k1}_{k2}"
    distortion = contribs_distortions[key]
    contrib = {
        "identifier": wf["wfid"], "formula": wf["pretty_formula"],
        "data": contribs_distortions[key]["data"],
        "structures": contribs_distortions[key]["structures"],
        "attachments": contribs_distortions[key]["attachments"]
    }
    for k in structure_keys:
        if k in wf:
            structure = Structure.from_dict(wf[k])
            structure.name = k
            contrib["structures"].append(structure)
    
    for k, v in flatten(wf, reducer="dot").items():
        if k.startswith(("_id", "cid")) or isinstance(v, list) or k.startswith(structure_keys):
            continue
        else:
            conf = columns.get(k)
            if conf:
                name, unit = conf["name"], conf["unit"]
                contrib["data"][name] = f"{v} {unit}" if unit else v
                
    attm = Attachment.from_data("workflow", wf)
    contrib["attachments"].append(attm)
    contrib["data"] = unflatten(contrib["data"], splitter="dot")
    contributions.append(contrib)

In [None]:
columns_map = {v["name"]: v["unit"] for k, v in columns.items()}
client.delete_contributions()
client.init_columns(columns_map)

In [None]:
client.submit_contributions(contributions, ignore_dupes=True)
client.init_columns(columns_map)

In [None]:
# client.make_public()