In [None]:
from mpcontribs.client import Client

In [None]:
name = "dilute_solute_diffusion"
client = Client(project=name)

In [None]:
import os, json, requests, sys
from pandas import read_excel, isnull, ExcelWriter, Series
from mp_api.client import MPRester
from pathlib import Path

data_dir = Path("/Users/patrick/GoogleDriveLBNL/My Drive/MaterialsProject/gitrepos/mpcontribs-data/")
zfile = data_dir / name / "z.json"
z = json.load(zfile.open())
mpr = MPRester("bmdNL4cV6Ei0CqhUAhK6JwFSZ6XMH0Gz")
fpath = f"{name}.xlsx"
download = False

if download or not os.path.exists(fpath):
    figshare_id = 1546772
    url = "https://api.figshare.com/v2/articles/{}".format(figshare_id)
    print("get figshare article {}".format(figshare_id))
    r = requests.get(url)
    figshare = json.loads(r.content)
    print("version =", figshare["version"])  # TODO set manually in "other"?

    print("read excel from figshare into DataFrame")
    df_dct = None
    for d in figshare["files"]:
        if "xlsx" in d["name"]:
            # Dict of DataFrames is returned, with keys representing sheets
            df_dct = read_excel(d["download_url"], sheet_name=None, engine="openpyxl")
            break
    if df_dct is not None:
        print("save excel to disk")
        with ExcelWriter(fpath) as writer:
            for sheet, df in df_dct.items():
                df.to_excel(writer, sheet)
    else:
        print("no excel sheet found on figshare")    
else:
    df_dct = read_excel(fpath, sheet_name=None, engine="openpyxl")

print(len(df_dct), "sheets loaded.")

In [None]:
# function to search MP via its summary API endpoint
def search(formula=None, spacegroup_number=None, chemsys=None):
    return mpr.summary.search(
        formula=formula, chemsys=chemsys, spacegroup_number=spacegroup_number,
        fields=["material_id"]#, sort_fields="energy_above_hull"
    )

In [None]:
host_info = df_dct["Host Information"].set_index("Host element name").dropna().drop("Unnamed: 0", axis=1)
hosts = None
host_info

In [None]:
contributions = []

for idx, host in enumerate(host_info.columns):
    if hosts is not None:
        if isinstance(hosts, int) and idx + 1 > hosts:
            break
        elif isinstance(hosts, list) and not host in hosts:
            continue

    print("get mp-id for {}".format(host))
    results = search(formula=host)
    if not results:
        print("mp-id for {} not found".format(host))
        continue

    mpid = str(results[0].material_id)
    contrib = {"identifier": mpid}
    print("add host info for {}".format(mpid))
    hdata = host_info[host].to_dict()
    for k in list(hdata.keys()):
        v = hdata.pop(k)
        ks = k.split()
        if ks[0] not in hdata:
            hdata[ks[0]] = {}
        unit = ks[-1][1:-1] if ks[-1].startswith("[") else ""
        subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0]
        if subkey == "lattice_constant":
            unit = "Å"
        try:
            unit = unit.replace("angstrom", "Å")
            hdata[ks[0]][subkey] = f"{v} {unit}" if unit else v
        except ValueError:
            hdata[ks[0]][subkey] = v
    contrib["formula"] = host
    df = df_dct["{}-X".format(host)].drop("Unnamed: 0", axis=1)
    rows = list(isnull(df).any(axis=1).to_numpy().nonzero()[0])
    if rows:
        cells = df.iloc[rows].dropna(how="all").dropna(axis=1)[df.columns[0]]
        note = cells.iloc[0].replace("following", cells.iloc[1])[:-1]
        hdata["note"] = note
        df = df.drop(rows)

    contrib["data"] = hdata
    
    print("add table for D₀/Q data for {}".format(mpid))
    df = df.set_index(df["Solute element number"])
    df = df.drop("Solute element number", axis=1)
    df.columns = df.iloc[0]
    df.index.name = "index"
    df = df.drop("Solute element name")
    df = df.T.reset_index()
    if str(host) == "Fe":
        df_D0_Q = df[
            [
                "Solute element name",
                "Solute D0, paramagnetic [cm^2/s]",
                "Solute Q, paramagnetic [eV]",
            ]
        ]
    elif hdata["Host"]["crystal_structure"] == "HCP":
        df_D0_Q = df[
            [
                "Solute element name",
                "Solute D0 basal [cm^2/s]",
                "Solute Q basal [eV]",
            ]
        ]
    else:
        df_D0_Q = df[["Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]"]]

    df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"]
    anums = [z[el] for el in df_D0_Q["Solute"]]
    df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index))
    df_D0_Q = df_D0_Q.sort_values("Z")
    df_D0_Q = df_D0_Q.reset_index(drop=True)
    df_D0_Q.attrs = {
        "name": "D0_Q",
        "title": "D₀/Q by Solute",
        "labels": {
            "value": "D₀/Q",
            #"variable": "method"
        }
    }
    contrib["tables"] = [df_D0_Q]

    if hdata["Host"]["crystal_structure"] == "BCC":
        print("add table for hop activation barriers for {} (BCC)".format(mpid))
        columns_E = (
            ["Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5)]
            + ["Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5)]
            + ["Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5)]
            + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7)]
        )
        df_E = df[["Solute element name"] + columns_E]
        df_E.columns = (
            ["Solute"]
            + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]]
            + ["E`{} [eV]".format(i) for i in ["₃", "₄"]]
            + ["E``{} [eV]".format(i) for i in ["₃", "₄"]]
            + ["E{} [eV]".format(i) for i in ["₅", "₆"]]
        )
        df_E.attrs = {
            "name": "hop_activation_barriers",
            "title": "Hop Activation Barriers",
        }
        contrib["tables"].append(df_E)

        print("add table for hop attempt frequencies for {} (BCC)".format(mpid))
        columns_v = (
            ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5)]
            + ["Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5)]
            + ["Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5)]
            + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7)]
        )
        df_v = df[["Solute element name"] + columns_v]
        df_v.columns = (
            ["Solute"]
            + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]]
            + ["v`{} [THz]".format(i) for i in ["₃", "₄"]]
            + ["v``{} [THz]".format(i) for i in ["₃", "₄"]]
            + ["v{} [THz]".format(i) for i in ["₅", "₆"]]
        )
        df_v.attrs = {
            "name": "hop_attempt_frequencies",
            "title": "Hop Attempt Frequencies",
        }
        contrib["tables"].append(df_v)

    elif hdata["Host"]["crystal_structure"] == "FCC":

        print("add table for hop activation barriers for {} (FCC)".format(mpid))
        columns_E = [
            "Hop activation barrier, E_{} [eV]".format(i) for i in range(5)
        ]
        df_E = df[["Solute element name"] + columns_E]
        df_E.columns = ["Solute"] + [
            "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
        ]
        df_E.attrs = {
            "name": "hop_activation_barriers",
            "title": "Hop Activation Barriers",
        }
        contrib["tables"].append(df_E)

        print("add table for hop attempt frequencies for {} (FCC)".format(mpid))
        columns_v = [
            "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5)
        ]
        df_v = df[["Solute element name"] + columns_v]
        df_v.columns = ["Solute"] + [
            "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
        ]
        df_v.attrs = {
            "name": "hop_attempt_frequencies",
            "title": "Hop Attempt Frequencies",
        }
        contrib["tables"].append(df_v)

    elif hdata["Host"]["crystal_structure"] == "HCP":

        print("add table for hop activation barriers for {} (HCP)".format(mpid))
        columns_E = [
            "Hop activation barrier, E_X [eV]",
            "Hop activation barrier, E'_X [eV]",
            "Hop activation barrier, E_a [eV]",
            "Hop activation barrier, E'_a [eV]",
            "Hop activation barrier, E_b [eV]",
            "Hop activation barrier, E'_b [eV]",
            "Hop activation barrier, E_c [eV]",
            "Hop activation barrier, E'_c [eV]",
        ]
        df_E = df[["Solute element name"] + columns_E]
        df_E.columns = ["Solute"] + [
            "Eₓ [eV]",
            "E`ₓ [eV]",
            "Eₐ [eV]",
            "E`ₐ [eV]",
            "E_b [eV]",
            "E`_b [eV]",
            "Eꪱ [eV]",
            "E`ꪱ [eV]",
        ]
        df_E.attrs = {
            "name": "hop_activation_barriers",
            "title": "Hop Activation Barriers",
        }
        contrib["tables"].append(df_E)

        print("add table for hop attempt frequencies for {} (HCP)".format(mpid))
        columns_v = ["Hop attempt frequency, v_a [THz]"] + [
            "Hop attempt frequency, v_X [THz]"
        ]
        df_v = df[["Solute element name"] + columns_v]
        df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"]
        df_v.attrs = {
            "name": "hop_attempt_frequencies",
            "title": "Hop Attempt Frequencies",
        }
        contrib["tables"].append(df_v)

    contributions.append(contrib)

len(contributions)

In [None]:
from flatten_dict import flatten, unflatten

columns_map = {
    "Host.crystal_structure": {"name": "host.symmetry", "description": "host crystal structure"},
    "Host.melting_temperature": {"name": "host.temperature|melt", "unit": "K", "description": "host melting temperature"},
    "Host.vacancy_formation_energy": {"name": "host.energy|formation", "unit": "eV", "description": "host vacancy formation energy"},
    "Host.lattice_constant": {"name": "host.lattice", "unit": "Å", "description": "host lattice constant"},
    "Host.self-diffusion_correction_shift": {"name": "host.shift", "unit": "eV", "description": "host self diffusion correction shift"},
    "note": {"name": "excluded", "description": "solutes were calculated but either did not converge or relaxed into the neighboring vacancy, making it ineligible for the analytical multi-frequency formalism"},
}
columns = {col["name"]: col.get("unit") for col in columns_map.values()}
clean_contributions = []

for contrib in contributions:
    clean_contrib = {"identifier": contrib["identifier"], "formula": contrib["formula"], "tables": contrib["tables"]}
    data = {}
    for k, v in flatten(contrib["data"], reducer="dot").items():
        data[columns_map[k]["name"]] = v.replace("The", "").replace(columns_map[k]["description"], "").replace(
            "solutes were calculated but either did not converge or relaxed into the neighboring vacancy, making the solute ineligible for the analytical multi-frequency formalism", ""
        ).strip()

    clean_contrib["data"] = unflatten(data, splitter="dot")
    clean_contributions.append(clean_contrib)

len(clean_contributions)

In [None]:
# description = client.get_project(fields=["description"]).get("description")
# description += " Diffusion values for Fe-X are given for the α-BCC phase, both paramagnetic and ferromagnetic. The paramagnetic D₀ and Q are given here, the full diffusivity can be obtained by: D|BCC(T) = D₀|para * exp[-Q|para*(1+αs²)/(kT)] where α=0.156 and s is the temperature dependent spontaneous magnetization of Fe relative to T=0K."
# description += " NSF award No. 1148011, version 10."
# client.update_project({"description": description})

In [None]:
# other = unflatten({col["name"]: col["description"] for col in columns_map.values()}, splitter="dot")
# #client.update_project({"other": {"funding": None, "version": None, "abbreviations": None, "FeX": None}})
# client.update_project({"other": other})

In [None]:
client.delete_contributions()
client.init_columns(columns)
client.submit_contributions(clean_contributions)
client.init_columns(columns)