In [None]:
import tarfile, os
import numpy as np
from pandas import read_excel
from mpcontribs.client import Client

In [None]:
name = "transparent_conductors"
client = Client()

In [None]:
# set unique_identifiers=False for project: allow multiple contributions with same identifier (disable dupe-check)
# client.projects.update_entry(pk=name, project={"unique_identifiers": False}).result()

**prep contributions**

In [None]:
google_id = "1bgQAdSfyrPEDI4iljwWlkyUPt_mo84jWr4N_1DKQDUI"
google_sheet = f"https://docs.google.com/spreadsheets/d/{google_id}/export?format=xlsx"
raw_contributions = []

for sheet_name in ["n-type TCs", "p-type TCs"]:
    df = read_excel(google_sheet, sheet_name=sheet_name, header=[0, 1, 2])
    doping = sheet_name.split(" ")[0]
    done = False

    for row in df.to_dict(orient="records"):
        identifier = None
        data = {"doping": doping}
        
        for keys, value in row.items():
            key = ".".join(
                [
                    k.replace("TC", "").strip()
                    for k in keys
                    if not k.startswith("Unnamed:")
                ]
            )
            
            if key.endswith("experimental doping type"):
                key = key.replace("Transport.", "")
                
            key_split = key.split(".")
            if len(key_split) > 2:
                key = ".".join(key_split[1:])
            if key.endswith("MP link") or key.endswith("range"):
                continue
            if key.endswith("google scholar"):
                key = key.replace(".google scholar", "")
            if key == "Material.mpid":
                if identifier is None:
                    if not isinstance(value, str) and np.isnan(value):
                        done = True
                        break
                    identifier = value.strip()
                    print(identifier)
            else:
                if key == "Material.p pretty formula":
                    key = "formula"
                if isinstance(value, str):
                    val = value.strip()
                else:
                    if isinstance(value, float) and np.isnan(value):
                        continue
                    if key.endswith(")"):
                        key, unit = key.rsplit(" (", 1)
                        unit = unit[:-1].replace("^-3", "⁻³").replace("^20", "²⁰")
                        unit = unit.replace("V2/cms", "cm²/V/s").replace("cm^2/Vs", "cm²/V/s")
                        if "," in unit:
                            extra_key = key.rsplit(".", 1)[0].lower() + ".conditions"
                            data[extra_key] = unit
                            unit = ""
                        val = f"{value} {unit}"
                    else:
                        val = value
                if not val:
                    continue
                clean_key = key.replace(" for VB:CB = 4:2", "").replace("?", "").lower()
                data[clean_key] = val

        if done:
            break
        
        raw_contributions.append({
            "identifier": identifier,
            "data": data
        })
        
len(raw_contributions)

In [None]:
# set(k for contrib in contributions for k in contrib["data"].keys())

In [None]:
keys_map = {
    'doping': {},  # don't rename, no unit
    'number of studies': {'rename': 'studies', 'unit': ''},  # dimensionless
    'quality.good or ok': {'rename': 'quality'},
    'structure and composition.common dopants': {'rename': 'dopants'},
    'structure and composition.space group symbol': {'rename': 'spacegroup'},
    
    'branch point energy.bpe min ratio': {'rename': 'BPE.ratio.min', 'unit': ''},
    'branch point energy.bpe max ratio': {'rename': 'BPE.ratio.max', 'unit': ''},
    'branch point energy.bpe ratio': {'rename': 'BPE.ratio.mean', 'unit': ''},
    'branch point energy.has degenerate bands': {'rename': 'BPE.degenerate'},
    
    'computed gap.hse06 band gap': {'rename': 'computed.gap.HSE06.band', 'unit': 'eV'},
    'computed gap.hse06 direct gap': {'rename': 'computed.gap.HSE06.direct', 'unit': 'eV'},
    'computed gap.pbe band gap': {'rename': 'computed.gap.PBE.band', 'unit': 'eV'},
    'computed gap.pbe direct gap': {'rename': 'computed.gap.PBE.direct', 'unit': 'eV'},

    'computed m*.conditions': {'rename': 'computed.m*.conditions'},
    'computed m*.m* avg': {'rename': 'computed.m*.average', 'unit': ''},
    'computed m*.m* planar': {'rename': 'computed.m*.planar', 'unit': ''},
    'computed stability.e_above_hull': {'rename': 'computed.stability.Eₕ', 'unit': 'eV'},
    'computed stability.e_above_pourbaix_hull': {'rename': 'computed.stability.Eₚₕ', 'unit': 'eV'},

    'experimental doping type': {'rename': 'experimental.doping'},
    'experimental gap.max experimental gap': {'rename': 'experimental.gap.range.max', 'unit': 'eV'},
    'experimental gap.max gap reference': {'rename': 'experimental.gap.references.max'},
    'experimental gap.min experimental gap': {'rename': 'experimental.gap.range.min', 'unit': 'eV'},
    'experimental gap.min gap reference': {'rename': 'experimental.gap.references.min'},

    'max experimental conductivity.associated carrier concentration': {
        'rename': 'experimental.conductivity.concentration', 'unit': 'cm⁻³'
    },
    'max experimental conductivity.dopant': {'rename': 'experimental.conductivity.dopant'},
    'max experimental conductivity.max conductivity': {
        'rename': 'experimental.conductivity.max', 'unit': 'S/cm'
    },
    'max experimental conductivity.reference link': {'rename': 'experimental.conductivity.reference'},
    'max experimental conductivity.synthesis method': {'rename': 'experimental.conductivity.method'},

    'max experimental mobility.dopant': {'rename': 'experimental.mobility.dopant'},
    'max experimental mobility.max mobility': {'rename': 'experimental.mobility.max', 'unit': 'cm²/V/s'},
    'max experimental mobility.reference link': {'rename': 'experimental.mobility.reference'},
    'max experimental mobility.synthesis method': {'rename': 'experimental.mobility.method'},
}

In [None]:
columns = {
    cfg.get("rename", k): cfg.get("unit")
    for k, cfg in keys_map.items()
}

In [None]:
contributions = []

for contrib in raw_contributions:
    contributions.append({
        "project": name,
        "identifier": contrib["identifier"],
        "is_public": True,
    })
    contributions[-1]["data"] = {
        cfg.get("rename", k): contrib["data"][k]
        for k, cfg in keys_map.items()
        if k in contrib["data"]
    }

**submit contributions**

In [None]:
client.delete_contributions(name)
client.init_columns(name=name, columns=columns)
client.submit_contributions(contributions)