In [None]:
from mpcontribs.client import Client
from pymatgen.ext.matproj import MPRester
from pymatgen.core import Structure

In [None]:
name = "perovskites_diffusion"
client = Client()
mpr = MPRester()

In [None]:
client.get_project(name).display()

In [None]:
client.projects.update_entry(pk=name, project={"unique_identifiers": False}).result()

In [None]:
import tarfile, os
from pandas import read_excel

units = {
    "emig": "eV",
    "bmag": "A*m²",
    "unitvol": "Å³",
    "Kcr": "Å",
    "freevol": "Å",
    "opband": "eV",
    "evf": "eV",
    "bob": "°",
    "ecoh": "eV",
    "bulkmod": "kbar",
    "efermi": "eV",
    "ehull": "eV",
    "aonn": "Å",
    "bonn": "Å",
    "aoarad": "Å",
    "bobrad": "Å",
    "kcaobo": "Å",
}


google_sheet = "https://docs.google.com/spreadsheets/d/1Wep4LZjehrxu3Cl5KJFvAAhKhP92o4K5aC-kZYjGz2o/export?format=xlsx"
contcars_filepath = "/Users/patrick/GoogleDriveLBNL/MaterialsProject/gitrepos/mpcontribs-data/perovskites_diffusion/bulk_CONTCARs.tar.gz"
contcars = tarfile.open(contcars_filepath)

df = read_excel(google_sheet)
keys = df.iloc[[0]].to_dict(orient="records")[0]
abbreviations = {}
contributions = []
count, skipped, update = 0, 0, 0

for index, row in df[1:].iterrows():
    identifier, data, structure = None, {}, None

    for col, value in row.iteritems():
        if col == "level_0" or col == "index":
            continue

        key = keys[col]
        if isinstance(key, str):
            key = key.strip()
            if not key in abbreviations:
                abbreviations[key] = col
        else:
            key = col.strip().lower()

        if key == "pmgmatchid":
            identifier = value.strip()
            if identifier == "None":
                identifier = None

            contcar_path = "bulk_CONTCARs/{}_CONTCAR".format(
                data["directory"].replace("/", "_")
            )
            contcar = contcars.extractfile(contcar_path).read().decode("utf8")    
            structure = Structure.from_str(contcar, "poscar", sort=True)

            if identifier is None:
                matched_mpids = mpr.find_structure(structure)
                if matched_mpids:
                    identifier = matched_mpids[0]
        else:
            if key == "calculation type":
                key = "type"
            if isinstance(value, str):
                val = value.strip()
            else:
                unit = units.get(key)
                val = f"{value} {unit}" if unit else value
            if val != "None":
                data[key] = val

    if identifier:
        contributions.append({
            "project": name, "identifier": identifier, "is_public": True,
            "data": data, "structures": [structure]
        })
        
len(contributions)

In [None]:
client.delete_contributions(name)
client.submit_contributions(contributions, ignore_dupes=True, per_page=35)

In [None]:
client.get_contributions(name)