In [None]:
from mpcontribs.client import Client, Attachment
from jarvis.db.figshare import data as jarvis_db
from pymatgen.io.jarvis import JarvisAtomsAdaptor
from jarvis.core.atoms import Atoms
from flatten_dict import flatten, unflatten
import pandas as pd

In [None]:
client = Client(project="jarvis_dft_2023")

In [None]:
name = 'dft_3d' # TODO dft_2d
data = jarvis_db(name)

In [None]:
columns = {
    'jid': {'name': 'jarvis', 'unit': None},
    'Tc_supercon': {'name': 'Tc', 'unit': 'K'},
    'avg_elec_mass': {'name': 'mass|avg.elec', 'unit': 'mₑ'},
    'avg_hole_mass': {'name': 'mass|avg.hole', 'unit': 'mₑ'},
    'bulk_modulus_kv': {'name': 'moduli.bulk|voigt', 'unit': 'GPa'},
    'shear_modulus_gv': {'name': 'moduli.shear', 'unit': 'GPa'},
    'crys': {'name': 'crystal', 'unit': None},
    'density': {'name': 'density', 'unit': 'g/cm³'},
    'dfpt_piezo_max_dielectric': {'name': 'piezo|max.dielectric.total', 'unit': 'C/m²'},
    'dfpt_piezo_max_dielectric_electronic': {'name': 'piezo|max.dielectric.electronic', 'unit': 'C/m²'},
    'dfpt_piezo_max_dielectric_ionic': {'name': 'piezo|max.dielectric.ionic', 'unit': 'C/m²'},
    'dfpt_piezo_max_dij': {'name': 'piezo|max.dij', 'unit': 'C/m²'},
    'dfpt_piezo_max_eij': {'name': 'piezo|max.eij', 'unit': 'C/m²'},
    'dimensionality': {'name': 'dimensionality', 'unit': None},
    'effective_masses_300K.n': {'name': 'mass|eff.n|300K', 'unit': ''},
    'effective_masses_300K.p': {'name': 'mass|eff.p|300K', 'unit': ''},
    'spg_number': {'name': 'spacegroup.number', 'unit': ''},
    'spg_symbol': {'name': 'spacegroup.symbol', 'unit': None},
    'hse_gap': {'name': 'bandgaps.HSE', 'unit': 'eV'},
    'mbj_bandgap': {'name': 'bandgaps.TBmBJ', 'unit': 'eV'},
    'optb88vdw_bandgap': {'name': 'bandgaps.OptB88vdW', 'unit': 'eV'},
    'n-powerfact': {'name': 'powerfactor.n', 'unit': 'µW/K²/m²'},
    'p-powerfact': {'name': 'powerfactor.p', 'unit': 'µW/K²/m²'},
    'slme': {'name': 'SLME', 'unit': '%'},
    'spillage': {'name': 'spillage', 'unit': ''},
    'encut': {'name': 'ENCUT', 'unit': 'eV'},
    'magmom_oszicar': {'name': 'magmoms.oszicar', 'unit': 'µB'},
    'magmom_outcar': {'name': 'magmoms.outcar', 'unit': 'µB'},
    'n-Seebeck': {'name': 'seebeck.n', 'unit': 'µV/K'},
    'p-Seebeck': {'name': 'seebeck.p', 'unit': 'µV/K'},
    'epsx': {'name': 'refractive.x', 'unit': ''},
    'epsy': {'name': 'refractive.y', 'unit': ''},
    'epsz': {'name': 'refractive.z', 'unit': ''},
    'max_ir_mode': {'name': 'IR.max', 'unit': 'cm⁻¹'},
    'min_ir_mode': {'name': 'IR.min', 'unit': 'cm⁻¹'},
    'ncond': {'name': 'Ncond', 'unit': ''},
    'nkappa': {'name': 'kappa.n', 'unit': ''},
    'pkappa': {'name': 'kappa.p', 'unit': ''},
    'exfoliation_energy': {'name': 'energies.exfoliation', 'unit': 'eV'},
    'formation_energy_peratom': {'name': 'energies.formation', 'unit': 'eV/atom'},
    'ehull': {'name': 'energies.hull', 'unit': 'eV'},
    'optb88vdw_total_energy': {'name': 'energies.OptB88vdW', 'unit': 'eV'},    
    'max_efg': {'name': 'EFG', 'unit': 'V/m²'},
    'func': {'name': 'functional', 'unit': None},
    'kpoint_length_unit': {'name': 'kpoints', 'unit': ''},
    'typ': {'name': 'type', 'unit': None},
    'nat': {'name': 'natoms', 'unit': ''},    
    'search': {'name': 'search', 'unit': None},
    'maxdiff_bz': {'name': 'maxdiff.bz', 'unit': ''},
    'maxdiff_mesh': {'name': 'maxdiff.mesh', 'unit': ''},
    'mepsx': {'name': 'meps.x', 'unit': ''},
    'mepsy': {'name': 'meps.y', 'unit': ''},
    'mepsz': {'name': 'meps.z', 'unit': ''},
    'pcond': {'name': 'pcond', 'unit': ''},
    'poisson': {'name': 'poisson', 'unit': ''},
}

In [None]:
contributions = []
list_keys = ['efg', 'elastic_tensor', 'modes', 'icsd']
identifier_key = "reference"
formula_key = "formula"
prefixes = ("mp-", "mvc-")
jarvis_url = 'https://www.ctcms.nist.gov/~knc6/static/JARVIS-DFT/'
identifiers = set()

for entry in data:
    identifier = entry[identifier_key]
    if not entry[identifier_key].startswith(prefixes) or identifier in identifiers:
        continue
        
    identifiers.add(identifier)
    contrib = {"identifier": identifier, "formula": entry[formula_key], "data": {}}
    attm_data = {}
    
    for k, v in entry.items():
        if not v or v == "na" or k == "xml_data_link":
            continue
        if k == "jid":
            contrib["data"][columns[k]["name"]] = f"{jarvis_url}{v}.xml"
        elif k == "atoms":
            continue
#             if "structures" not in contrib:
#                 contrib["structures"] = []
#             atoms = Atoms.from_dict(entry[k])
#             contrib["structures"].append(JarvisAtomsAdaptor.get_structure(atoms))
        elif k == "raw_files":
            contrib["data"]["files"] = {}
            for x in v:
                xs = [s for s in x.split(",")]
                name, url = xs[0].replace("-", "|"), xs[2]
                contrib["data"][f"files.{name}"] = url
        elif k in list_keys:
            if isinstance(v, str) and "," in v:
                attm_data[k] = v.split(",")
            else:
                attm_data[k] = v
        elif k in columns:
            name, unit = columns[k]["name"], columns[k]["unit"]
            contrib["data"][name] = f"{v} {unit}" if unit else v
    
    if attm_data:
        contrib["attachments"] = [Attachment.from_data("lists", attm_data)]

    contrib["data"] = unflatten(contrib["data"], splitter="dot")
    contributions.append(contrib)

len(contributions)

In [None]:
# get column names for "files"
files_columns = set()

for contrib in contributions:
    if "files" in contrib["data"]:
        flat_files = flatten(contrib["data"]["files"], reducer="dot")
        files_columns.update(flat_files.keys())
        
files_columns

In [None]:
columns_map = {v["name"]: v["unit"] for k, v in columns.items()}
for k in files_columns:
    columns_map[f"files.{k}"] = None

In [None]:
client.delete_contributions()
client.init_columns(columns_map)

In [None]:
client.submit_contributions(contributions, ignore_dupes=True)
client.init_columns(columns_map)

In [None]:
client._reinit()

In [None]:
ncontribs, _ = client.get_totals()
ncontribs

In [None]:
[p for p in client.available_query_params() if p.startswith("data__energies")]

In [None]:
query = {"data__energies__hull__value__lte": 0.05}
count, _ = client.get_totals(query=query)
print(f"materials with ehull <= 0.05 eV/atom: {count/ncontribs*100:.1f}%")
fields = ["identifier", "formula", "data.energies.hull.value"]
sort = "data.energies.hull.value"
contribs = client.query_contributions(query=query, fields=fields, sort=sort, paginate=True)
pd.json_normalize(contribs["data"])

In [None]:
# Material-screening: find materials with Bi element, spin-orbit spillage >= 0.5,
# OptB88vdW bandgaps > 0.01, energy above convex hull < 0.1 eV/atom , and  SLME > 5%
query = {
    "formula__contains": "Bi",
    "data__spillage__value__gte": 0.5,
    "data__bandgaps__OptB88vdW__value__gt": 0.01,
    "data__energies__hull__value__lt": 0.1,
    "data__SLME__value__gt": 5
}
fields = [
    "identifier", "formula", "data.spillage.value", "data.bandgaps.OptB88vdW.value",
    "data.energies.hull.value", "data.SLME.value",
]
contribs = client.query_contributions(query=query, fields=fields, sort=sort, paginate=True)
pd.json_normalize(contribs["data"])

In [None]:
# find all cubic materials
query = {"data__crystal__exact": "cubic"}
fields = ["identifier", "formula", "data.crystal", "data.energies.hull.value"]
contribs = client.query_contributions(query=query, fields=fields, sort=sort, paginate=True)
pd.json_normalize(contribs["data"])