In [None]:
from mpcontribs.client import Client
from mp_api.client import MPRester
import pandas as pd
import os
from flatten_dict import unflatten, flatten
from math import isnan

In [None]:
fn = "/Users/patrick/GoogleDriveLBNL/My Drive/MaterialsProject/gitrepos/mpcontribs-data/20180504_TE_survey_Kleinke.xlsx"
df = pd.read_excel(fn)

In [None]:
df.shape

In [None]:
apikey = os.environ["MPCONTRIBS_API_KEY"]

In [None]:
name = "experimental_thermoelectrics"

In [None]:
with MPRester(apikey) as mpr:  # needs MP_API_KEY environment variable to be set
    mpr.contribs.create_project(
        name=name,
        title="Experimental Thermoelectrics",
        authors="R. Seshradi",
        description="Data-Driven Review of Thermoelectric Materials: Performance and Resource Considerations.",
        url="https://pubs.acs.org/doi/10.1021/cm400893e"
    )

In [None]:
client = Client(project=name)

In [None]:
client.get_project()

In [None]:
columns_map = {
    "T (K)": {"name": "temperature", "unit": "K", "description": "Temperature in Kelvin"},
    "Z*10^-4 reported": {"name": "Z", "unit": "", "description": "reported Z", "scale": 1e4},
    "Resist. (Ohm.cm)": {"name": "resistivity.RT", "unit": "Ω·cm", "description": "Resistivity at room temperature in Ωcm"},
    "Resist. (400K)": {"name": "resistivity.400K", "unit": "Ω·cm", "description": "Resistivity at 400K in Ωcm"},
    "Seebeck (uV/K)": {"name": "seebeck.RT", "unit": "µV/K", "description": "Seebeck coefficient at room temperature in µV/K"},
    "Seebeck (400K)": {"name": "seebeck.400K", "unit": "µV/K", "description": "Seebeck coefficient at 400K in µV/K"},
    "kappa (W/mK)": {"name": "kappa.mean", "unit": "W/mK", "description": "TODO"},
    "kappaZT": {"name": "kappa.ZT", "unit": "", "description": "TODO"},
    "Pf (W/K^2/m)": {"name": "Pf", "unit": "W/K²/m", "description": "Power Factor in W/K²/m"},
    "Power Factor*T (W/mK)": {"name": "PfT", "unit": "W/K/m", "description": "Power Factor times Temperature in W/K/m"},
    "ZT": {"name": "ZT", "unit": "", "description": "ZT"},
    "x": {"name": "x", "unit": "", "description": "TODO"},
    "series": {"name": "series", "unit": None, "description": "TODO"},
    "T Max": {"name": "Tmax", "unit": "K", "description": "TODO"},
    "family": {"name": "family", "unit": None, "description": "TODO"},
    "Conduct. (S/cm)": {"name": "conductivity", "unit": "S/cm", "description": "Conductivity in S/cm"},
    "S^2": {"name": "S2", "unit": "", "description": "S²"},
    "ke/ktotal": {"name": "ke|rel", "unit": "", "description": "ke/ktotal"},
    "space group": {"name": "spacegroup", "unit": "", "description": "space group"},
    "# symmetry elements": {"name": "nsymelems", "unit": "", "description": "number of symmetry elements"},
    "preparative route": {"name": "route", "unit": None, "description": "Preparative Route"},
    "final form": {"name": "final", "unit": None, "description": "Final Form"},
    "Authors": {"name": "authors.main", "unit": None, "description": "Authors"},
    "Author of Unit Cell": {"name": "authors.cell", "unit": None, "description": "Author of Unit Cell"},
    "DOI": {"name": "dois.main", "unit": None, "description": "Digital Object Identifier (DOI)"},
    "Unit Cell DOI": {"name": "dois.cell", "unit": None, "description": "Unit Cell DOI"},
    "ICSD of structure": {"name": "icsd.number", "unit": "", "description": "ICSD of structure"},
    "temp of ICSD (K)": {"name": "icsd.temperature", "unit": "K", "description": "temp of ICSD (K)"},
    "Cell Volume (A^3)": {"name": "volume.cell", "unit": "Å³", "description": "Cell Volume in Å³"},
    "average atomic volume": {"name": "volume.atomic", "unit": "", "description": "average atomic volume"},
    "Formula Units per Cell": {"name": "units", "unit": "", "description": "Formula Units per Cell"},
    "Atoms per formula unit": {"name": "natoms.formunit", "unit": "", "description": "Atoms per formula unit"},
    "total atoms per unit cell": {"name": "natoms.total", "unit": "", "description": "total atoms per unit cell"}
}
skip = ("Unnamed:", "Comments")
# for col in df.columns:
#     if not col.startswith(skip) and col not in columns_map:
#         print(col)

columns = {col["name"]: col.get("unit") for col in columns_map.values()}

In [None]:
import csv
field_names = ["column", "name", "unit", "scale", "description"]
csvlines = []
for k, v in columns_map.items():
    line = {"column": k}
    line.update(v)
    csvlines.append(line)
    
with open(f'{name}_columns.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(csvlines)

In [None]:
other = unflatten({
    col["name"]: col["description"] for col in columns_map.values()
}, splitter="dot")
client.update_project({"other": other})

In [None]:
provenance = mpr.materials.provenance.search(fields=["material_id", "database_IDs"])

In [None]:
from emmet.core.provenance import Database

In [None]:
icsd_lookup = {}

for prov in provenance:
    icsds = prov.database_IDs.get(Database.ICSD, [])
    for icsd in icsds:
        icsd_lookup[icsd.split("-")[-1]] = str(prov.material_id)

len(icsd_lookup)

In [None]:
client.update_project({"unique_identifiers": False})

In [None]:
contributions = []

for record in df.to_dict("records"):
    formula = record.pop("Formula")
    if not isinstance(formula, str) and isnan(formula):
        continue
        
    clean = {}
    for k, v in record.items():
        if k.startswith(skip) or k not in columns_map:
            continue

        # remove NaNs (tip: skip any unset/empty keys)
        if not isinstance(v, str) and isnan(v):
            continue
        # convert boolean values to Yes/No, and append units       
        key = columns_map[k]["name"]
        unit = columns_map[k].get("unit")
        scale = columns_map[k].get("scale")
        val = v
        if scale is not None and isinstance(scale, (float, int)):
            val *= scale
        
        if isinstance(v, bool):
            val = "Yes" if v else "No"
        elif isinstance(v, int) and not unit:
            val = str(v)
        elif unit:
            val = f"{v} {unit}"

        clean[key] = val

    icsd = clean.get("icsd.number")
    if not icsd:
        continue
        
    identifier = icsd_lookup.get(icsd)
    if not identifier:
        continue

    contrib = {"identifier": identifier, "formula": formula}
    contrib["data"] = unflatten(clean, splitter="dot")
    contributions.append(contrib)

len(contributions)

In [None]:
client.delete_contributions()  # remove all contributions from project
client.init_columns(columns)
client.submit_contributions(contributions)
client.init_columns(columns) # shouldn't be needed but ensures all columns appear
# client.make_public()