In [None]:
# make sure to use mpcontribs-client>=5.4.0
import hashlib
import pandas as pd
from pathlib import Path
from mpcontribs.client import Client, Attachment
from pymatgen.core import Structure
from pymatgen.io.lammps.outputs import parse_lammps_dumps
from flatten_dict import unflatten

In [None]:
client = Client(project="gbdb") # set your API key via the `apikey` keyword argument

### add legend to project info

In [None]:
# add legend in `other`
other = {
    "element": "element",
    "indices": "hkl indices",
    "boundary": "boundary type - tilt or twist",
    "energy": "grain boundary energy in J/m2",
    "n": "planar density parameter",
    "translation": "amount of horizontal translation of top slab in x/y direction in Å",
    "repetitions": "number of repetitions of the base structure in x/y direction",
    "temperature": "temperature of MD simulation in Kelvin",
    "steps": "number of steps of MD simulation",
    "potential": "classical potential used"
}
client.update_project({"other": other})

### Submit Data

In [None]:
# initialize columns
columns = {
    "element": None, # string
    "indices.h": "", # dimensionless
    "indices.k": "",
    "indices.l": "",
    "boundary": None,
    "energy": "J/m²",
    "n": "",
    "translation.x": "Å",
    "translation.y": "Å",
    "repetitions.x": "",
    "repetitions.y": "",
    "temperature": "K",
    "steps": "",
    "potential": None
}
client.init_columns(columns)

In [None]:
# function to convert lammps dump to pymatgen structure
def get_structure(elem, path):
    dump = next(parse_lammps_dumps(str(path)))
    lat = dump.box.to_lattice()
    spec = [elem for i in range(dump.natoms)]
    df = dump.data.copy()
    df.drop(df.tail(1).index, inplace=True)
    pos = df[['x', 'y', 'z']].to_numpy()
    return Structure(lattice=lat, species=spec, coords=pos, coords_are_cartesian=True)

In [None]:
# prep contributions
contributions = []
indir = Path("/Users/patrick/GoogleDriveLBNL/My Drive/MaterialsProject/gitrepos/mpcontribs-data/gbdb")
keys = list(k for k in columns.keys() if not k.startswith("indices"))
keys.insert(1, "indices")

for path in indir.glob("lammps_*"):
    identifier = hashlib.md5(path.name.encode("utf-8")).hexdigest()
    contrib = {"identifier": identifier, "data": {}}
    
    for idx, part in enumerate(path.name.split("_")[1:]):
        if idx == 1:
            contrib["data"]["indices"] = {k: int(v) for k, v in zip("hkl", part)}
        else:
            key = keys[idx]
            unit = columns[key]
            contrib["data"][key] = f"{part} {unit}" if unit else part
    
    contrib["data"] = unflatten(contrib["data"], splitter="dot")
    structure = get_structure(contrib["data"]["element"], path)
    contrib["formula"] = structure.composition.reduced_formula
    contrib["structures"] = [structure]
    contrib["attachments"] = [Attachment.from_textfile(path)]
    contributions.append(contrib)


contributions[0]

In [None]:
# delete contributions and reinit columns, if needed
client.delete_contributions()
client.init_columns(columns)

In [None]:
# submit contributions
client.submit_contributions(contributions)
client.init_columns(columns) # this should not be needed but doesn't hurt, possible API bug

### Query Data

In [None]:
#client._reinit() # only needed if data just uploaded
ncontribs, _ = client.get_totals()
ncontribs

In [None]:
# available query parameters
client.available_query_params()

In [None]:
query = {"data__boundary__exact": "tilt", "data__n__value__gt": 0}
count, _ = client.get_totals(query=query)
print(f"grain boundaries of type tilt and n>0: {count/ncontribs*100:.1f}%")
fields = ["identifier", "formula", "data.energy.value", "data.potential"]
sort = "data.energy.value"
contribs = client.query_contributions(query=query, fields=fields, sort=sort, paginate=True)
pd.json_normalize(contribs["data"])