In [None]:
from monty.serialization import loadfn
from json import loads
from pymatgen.core.structure import Molecule, Structure
from pathlib import Path
from time import time
from mpcontribs.client import Client

In [None]:
name = "open_catalyst_project"
client = Client()

**prep contributions**

In [None]:
p = Path("/Users/patrick/GoogleDriveLBNL/MaterialsProject/gitrepos/mpcontribs-data/ocp-sample")
jsons = list(p.glob("*.json.gz"))

In [None]:
def get_contribution(path):
    data = loadfn(path)
    struct = data['trajectory'][-1]
    struct.add_site_property('tags', [int(t) for t in data['tags']])

    mol = Molecule.from_sites([site for site in struct if site.properties['tags'] == 2])
    iupac_formula = mol.composition.iupac_formula
    bulk_struct = Structure.from_sites([site for site in struct if site.properties['tags'] != 2])
    bulk_formula = bulk_struct.composition.reduced_formula

    search_data = {
        "mpid": data['bulk_id'],
        "adsorptionEnergy": data["adsorption_energy"],
        "adsorbateSmiles": data["adsorbate_smiles"],
        "adsorbateIUPACFormula": iupac_formula,
        "bulkFormula": bulk_formula,
        "h": data["surface_miller_indices"][0],
        "k": data["surface_miller_indices"][1],
        "l": data["surface_miller_indices"][2],
        "surfaceTop": data["surface_top"],
        "surfaceShift": data["surface_shift"]
    }

    return {
        "project": name,
        "formula": struct.composition.reduced_formula,
        "identifier": data["id"],
        "data": search_data,
        "structures": [struct],
        "attachments": [path]
    }

In [None]:
client.get_totals({"project": name})

In [None]:
all_ids = client.get_all_ids({"project": name}).get(name, {}).get("identifiers", set())
len(all_ids)

In [None]:
contributions = [get_contribution(path) for path in jsons if Path(path.stem).stem not in all_ids]
len(contributions)

**submit contributions**

In [None]:
client.submit_contributions(contributions)