In [None]:
import json
from mpcontribs.client import Client, Attachment
from pymatgen.core import Structure
from pathlib import Path
from flatten_dict import flatten, unflatten

In [None]:
client = Client(project="sandbox")

In [None]:
input_file = Path("/Users/patrick/Downloads/sample_data.json")

In [None]:
with input_file.open() as f:
    raw = json.load(f)

In [None]:
# reorganize field names and set units
excitation_reorg = {
    "mu": {"field": "mu", "unit": ""},
    "spin": {"field": "spin", "unit": ""},
    "ks_diff": {"field": "ks|diff", "unit": ""},
    "hse0_ks_diff": {"field": "ks|diff", "unit": ""}, # can map to same subkey
    "shuffle": {"field": "shuffle", "unit": None},
    "in_band_transition": {"field": "transition", "unit": None},
    "missing_vbm": {"field": "VBM|missing", "unit": None},
    "initial_band": {"field": "band.initial", "unit": ""},
    "final_band": {"field": "band.final", "unit": ""},
    "inital_band_e": {"field": "band|e.initial", "unit": ""}, # typo in data!
    "final_band_e": {"field": "band|e.final", "unit": ""},
    "initial_ipr": {"field": "ipr.initial", "unit": ""},
    "final_ipr": {"field": "ipr.final", "unit": ""},
    "ipr_ratio": {"field": "ipr.ratio", "unit": ""},
}

reorg = {
    "is_complex": {"field": "complex", "unit": None}, # str
    "dopant": {"field": "dopant", "unit": None},
    "charge": {"field": "charge", "unit": ""}, # dimensionless
    "uncorrected_energy": {"field": "energy|uncorrected", "unit": "eV"},
    "chemsys": {"field": "chemsys", "unit": None},
    "space_group": {"field": "spacegroup", "unit": None},
    "point_group": {"field": "pointgroup", "unit": None},
    "relative_stability": {"field": "stability", "unit": ""},
    "spin_state": {"field": "spin", "unit": ""},
    "defect_type": {"field": "defect.type", "unit": None},
    "defect_name": {"field": "defect.name", "unit": None},
    "pbe_formability": {"field": "formability.PBE", "unit": ""},
    "hse_formability": {"field": "formability.HSE", "unit": ""},
    "kumagai_ks_correction": {"field": "correction.ks|kumagai", "unit": ""},
    "wei_ks_correction": {"field": "correction.ks|wei", "unit": ""},
    "dft_bright_up_excitation": {"field": "excitations.DFT.bright|up"},
    "dft_bright_dw_excitation": {"field": "excitations.DFT.bright|dw"},
    "dft_bright_excitation": {"field": "excitations.DFT.bright"},
    "hse0_bright_up_excitation": {"field": "excitations.HSE0.bright|up"},
    "hse0_bright_dw_excitation": {"field": "excitations.HSE0.bright|dw"},
    "hse0_bright_excitation": {"field": "excitations.HSE0.bright"},
}

for k, v in list(reorg.items()):
    if not "unit" in v:
        root_field = reorg.pop(k).get("field")
        
        for kk, vv in excitation_reorg.items():
            new_key = f"{k}.{kk}"
            new_field = f"{root_field}.{vv['field']}"
            reorg[new_key] = {"field": new_field, "unit": vv["unit"]}
            
columns = {v["field"]: v["unit"] for k, v in reorg.items()}
client.init_columns(columns)

In [None]:
def convert(x, unit=None):
    if isinstance(x, bool):
        return "Yes" if x else "No"
    
    return x if not unit else f"{x} {unit}"

In [None]:
contributions = []
structure_keys = ["initial_defect_structure", "final_defect_structure"]
attm_keys = [
    'int_eigenvalues', 'raw_eigenvalues', 'ipr', 'defect_ipr', 'raw_tdm_entry',
    'hse0_raw_eigenvalues', 'hse0_int_eigenvalues'
]
remove_keys = ["_id", "defect_dir"]
id_key = "entry_id"
formula_key = "composition"
skip_keys = structure_keys + attm_keys + remove_keys + [id_key, formula_key]


for r in raw:
    contrib = {
        "identifier": f"entry-{r[id_key]}", "formula": r[formula_key],
        "data": {}, "structures": [], "attachments": []
    }
    
    for k, v in flatten(r, reducer="dot").items():
        if k.split(".", 1)[0] not in skip_keys:
            contrib["data"][reorg[k]["field"]] = convert(v, unit=reorg[k]["unit"])
    
    for k in structure_keys:
        s = Structure.from_dict(r[k])
        s.name = k
        contrib["structures"].append(s)
        
    for k in attm_keys:
        a = Attachment.from_data(k, json.loads(r[k]))
        contrib["attachments"].append(a)    
        
    contributions.append(contrib)

len(contributions)

In [None]:
client.submit_contributions(contributions)

In [None]:
client.query_contributions(fields=["id", "identifier", "data"])
# client.delete_contributions()