In [None]:
%env MPRESTER_MUTE_PROGRESS_BARS 1
import os
from pathlib import Path
from mpcontribs.client import Client
from mp_api.client import MPRester
from flatten_dict import unflatten, flatten
from pymatgen.io.cif import CifParser
from pandas import DataFrame
import numpy as np

**init client and load raw data**

In [None]:
client = Client(project="pydatarecognition")

In [None]:
drivedir = Path("/Users/patrick/GoogleDriveLBNL/My Drive/")
datadir = drivedir / "MaterialsProject/gitrepos/mpcontribs-data/pydatarecognition"
cifs = datadir / "cifs"

**prepare contributions by converting CIFs to `data`, `tables`, and `attachments`**

In [None]:
# calculated cifs (NOTE make sure to gzip all CIFs)
contributions = []
columns = {"type": None, "date": None, "wavelength": "Å"} # sets fields and their units

for path in (cifs / "calculated").iterdir():
    for identifier, v in CifParser(path).as_dict().items():
        typ, date = v["_publcif_pd_cifplot"].strip().split()
        wavelength = f'{v["_diffrn_radiation_wavelength"]} Å'
        intensities = v["_pd_calc_intensity_total"]
        prefix, nbins = "_pd_proc_2theta_range", len(intensities)
        inc, start, end = float(v[f"{prefix}_inc"]), float(v[f"{prefix}_min"]), float(v[f"{prefix}_max"])
        two_theta = np.arange(0, end, inc) # BUG? getting 1999 bins for start=0.02 (converted to Q)
        spectrum = DataFrame({"2θ": two_theta, "intensity": intensities}).set_index("2θ")
        spectrum.attrs = {"name": "powder diffraction", "title": "Powder Diffraction Pattern"}
        contributions.append({
            "identifier": identifier, "formula": v["_chemical_formula"],
            "data": {"type": typ, "date": date, "wavelength": wavelength, "proc": d["proc"]},
            #"tables": [spectrum], "attachments": [path]
        })

len(contributions)

**(re)submit all contributions**

In [None]:
client.delete_contributions()
client.init_columns(columns)
client.submit_contributions(contributions, ignore_dupes=True, per_request=6)
# this shouldn't be necessary but need to re-init columns likely due to bug in API server
client.init_columns(columns) 

# NOTE submit_contributions can also be used to submit partial updates (can provide example in the future)

**query contributions and retrieve `tables` and `attachments`**

In [None]:
client.available_query_params()

In [None]:
query = {"data__wavelength__value__lt": 1.59, "identifier__contains": "III_"}

result = client.query_contributions(
    query=query, fields=["identifier", "data.wavelength.value", "attachments", "tables"]
)

In [None]:
attm = client.get_attachment(result["data"][0]["attachments"][0]["id"]) # use attm.unpack() to get file contents

In [None]:
table = client.get_table(result["data"][0]["tables"][0]["id"]) # pandas Dataframe

In [None]:
table.display()

**query `attachments` and `tables` directly**

In [None]:
# TODO can implement client.query_attachments() to make this easier
client.attachments.queryAttachments(name__contains="he5606Sr").result()

In [None]:
client.tables.queryTables(attrs__title__contains="Pattern").result()

**use unflatten to convert underscores to nested dict**

In [None]:
unflatten({"_pd_proc_range_min": 1, "_pd_proc_range_max": 2}, splitter="underscore")