In [None]:
import pandas as pd

from mpcontribs.client import Client
from pathlib import Path
from monty.serialization import loadfn
from flatten_dict import unflatten
from math import isnan

In [None]:
data_dir = Path("/Users/patrick/GoogleDriveLBNL/My Drive/MaterialsProject/gitrepos/mpcontribs-data/ForbiddenTransitions")

In [None]:
df_path = data_dir / "2022_Woods-Robinson_properties-df_mpcontribs"
df = pd.read_pickle(df_path)

In [None]:
df.head()

In [None]:
columns_map = {
    # root level
    "Materials Project ID (mpid)": {"name": "identifier", "description": "Materials Project ID as of May 30, 2023"},
    "Formula": {"name": "formula", "description": "Chemical formula (from pretty_formula on MP)"},
    # info
    "Space group": {"name": "info.spacegroup", "description": "Space group symbol from MP"},
    "# ICSD entries": {"name": "info.numICSDs", "unit": "", "description": "Number of ICSD entries that structure-match to this compound (queried from the Materials Project)"},
    "Calculation origin": {"name": "info.origin", "description": "The source of the calculation; note that some of these calculations derive from Fabini et al. 2019 (10.1021/acs.chemmater.8b04542) and the associated MPContribs data set)"},
    # chemical properties
    "$t_\mathrm{IPR}^\mathrm{d}$": {"name": "properties.chemical.IPR", "description": "Inverse participation ratio of the direct VBM and CBM states, used as a proxy for localization of states at the band edges (a high IPR indicates strong localization), as defined by Wegner in 1980 (10.1007/BF01325284) and implemented by Xiong, et al. in 2023 (10.1126/sciadv.adh8617)  (see manuscript for details)"},
    "$σ^\mathrm{d}$": {"name": "properties.chemical.sigma", "unit": "", "description": "Orbital similarity of the direct VBM and CBM states, derived from the dominant contributors to the density of states at the direct VBM and CBM to describe the similarity of CB-edge and VB-edge orbital contributions (see manuscript for details)"},
    # other properties
    "$E_\mathrm{hull}$ (eV/at.)": {"name": "properties.other.hull", "unit": "eV/atom", "description": "Energy above the convex hull, computed using GGA (or GGA+U when appropriate) and MP compatability scheme"},
    "Synthesized?": {"name": "properties.other.synthesized", "description": "Whether a given compound has been synthesized in any form (queried from the Materials Project)"},
    # optical properties
    "$E_\mathrm{G}^\mathrm{GGA}$ (eV)": {"name": "properties.optical.bandgaps.GGA", "unit": "eV", "description": "Fundamental band gap computed using GGA (or GGA+U when appropriate)"},
    "$E_\mathrm{G}^\mathrm{d,GGA}$ (eV)": {"name": "properties.optical.bandgaps.GGA|d", "unit": "eV", "description": "Direct band gap computed using GGA (or GGA+U when appropriate)"},
    "$E_\mathrm{G}^\mathrm{da,GGA}$ (eV)": {"name": "properties.optical.bandgaps.GGA|da", "unit": "eV", "description": "Direct allowed band gap computed using GGA (or GGA+U when appropriate), defined as the energy at which dipole transition matrix elements become significant (see manuscript for details)"},
    "$E_\mathrm{edge}^\mathrm{da,GGA}$ (eV)": {"name": "properties.optical.energy|edge.GGA|da", "unit": "eV", "description": "Absorption edge energy, defined as the approximate energy at which the absorption coefficient rises to 1e4 cm-1 and becomes significant (see manuscript for details)"},
    "$Δ^\mathrm{d,GGA}$": {"name": "properties.optical.delta.GGA|d", "unit": "", "description": "Forbidden energy difference, defined as the energy difference between the direct band gap and direct allowed band gap, such that a value greater than zero indicates the presence of forbidden or weak transitions"},
    "$Δ_\mathrm{edge}^\mathrm{d,GGA}$": {"name": "properties.optical.delta|edge.GGA|d", "unit": "", "description": "Edge energy difference, defined as defined as the energy difference between the direct band gap and the absorption edge energy"},
    "$α_\mathrm{avg.vis}^\mathrm{GGA}$ (cm$^{-1}$)": {"name": "properties.optical.alpha|vis", "unit": "cm⁻¹", "description": "Average GGA absorption coefficient in the visible regime, using an empirical gap correction from Morales et al. 2017 (10.1021/acs.jpcc.7b07421) (see manuscript for details; caution that this should be recalculated if using a scissor shift!)"},
    "Optical type": {"name": "properties.optical.type", "description": "Optical type categorization (OT 1\u20134), following the classification outlined by Yu and Zunger in 2012 (10.1103/PhysRevLett.108.068701)"},
    # transport properties
    "$m^*_\mathrm{e}$": {"name": "properties.transport.effmass.electron", "unit": "mₑ", "description": "Electron effective mass, computed using the BoltzTraP2 package assuming dopings of 10^18 cm-3 (see manuscript for details)"},
    "$m^*_\mathrm{h}$": {"name": "properties.transport.effmass.hole", "unit": "mₑ", "description": "Hole effective mass, computed using the BoltzTraP2 package assuming dopings of 10^18 cm-3 (see manuscript for details)"},
}

legend = {v["name"]: v["description"] for v in columns_map.values()}
legend["tables.corrected"] = "Absorption coefficient computed with the IPA and a GGA functional, using the empirical gap correction from Morales et al. 2017 (10.1021/acs.jpcc.7b07421) (see manuscript for details)"
legend["tables.uncorrected"] = "Absorption coefficient computed with the IPA and a GGA functional (without any empirical gap correction as in alpha; see manuscript for details)"

columns = {v["name"]: v.get("unit") for v in columns_map.values() if v["name"] not in ["identifier", "formula"]}

In [None]:
spectra = loadfn(data_dir / "2022_Woods-Robinson_absorption-coeff_mpcontribs.json.gz")
len(spectra)

In [None]:
contributions = []

for record in df.to_dict("records"):
    clean = {}
    for k, v in record.items():
        if not isinstance(v, str) and isnan(v):
            continue
       
        key = columns_map[k]["name"]
        unit = columns_map[k].get("unit")
        val = v
        if isinstance(v, bool):
            val = "Yes" if v else "No"
        elif unit:
            val = f"{v} {unit}"
    
        clean[key] = val

    contrib = {"identifier": clean.pop("identifier"), "formula": clean.pop("formula"), "tables": []}
    contrib["data"] = unflatten(clean, splitter="dot")

    spectrum = spectra.get(contrib["identifier"])
    if spectrum:
        spectrum.pop("mpid", None)
        spectrum.pop("formula", None)
        table = pd.DataFrame(data=spectrum).rename(
            columns={"energy": "energy [eV]", "alpha": "α", "alpha_uncorr": "α|uncorrected"}
        ).set_index("energy [eV]")
        table.attrs = {
            "name": "absorption coefficients",
            "title": "Energy-dependent Absorption Coefficients",
            "labels": {
                "value": "absorption coefficient [cm⁻¹]",
                "variable": "method"
            }
        }
        contrib["tables"].append(table)
    
    contributions.append(contrib)

len(contributions)

In [None]:
client = Client(project="ForbiddenTransitions")

In [None]:
# client.update_project({"other": legend})

In [None]:
# client.delete_contributions()
# client.init_columns(columns)
client.submit_contributions(contributions[150:1000])
client.init_columns(columns)

In [None]:
client.available_query_params(startswith="data__properties__optical")

In [None]:
query = {
    "data__properties__other__synthesized__exact": "Yes",
    "data__properties__optical__type__contains": "ia",
    "data__properties__optical__bandgaps__GGA__value__gt": 3
}
client.count(query=query)

In [None]:
contribs = client.query_contributions(query=query, fields=["identifier", "data.properties.other"], paginate=True)
contribs["data"][0]["data"]

In [None]:
contribs = client.download_contributions(query=query, include=["tables"])
contribs[0]["tables"][0] # DataFrame