In [None]:
%env MPRESTER_MUTE_PROGRESS_BARS 1
import os
from pathlib import Path
from mpcontribs.client import Client, Attachment
from mp_api.client import MPRester
from flatten_dict import unflatten, flatten
from pymatgen.io.cif import CifParser
from pandas import DataFrame
import numpy as np

In [None]:
client = Client(project="pydatarecognition")

In [None]:
drivedir = Path("/Users/patrick/GoogleDriveLBNL/My Drive/")
datadir = drivedir / "MaterialsProject/gitrepos/mpcontribs-data/pydatarecognition"
cifs = datadir / "cifs"

In [None]:
# calculated cifs (NOTE make sure to gzip all CIFs)
contributions = []
columns = {"type": None, "date": None, "wavelength": "Å"} # sets fields and their units

for path in (cifs / "calculated").iterdir():
    for identifier, v in CifParser(path).as_dict().items():
        typ, date = v["_publcif_pd_cifplot"].strip().split()
        wavelength = f'{v["_diffrn_radiation_wavelength"]} Å'
        intensities = v["_pd_calc_intensity_total"]
        prefix, nbins = "_pd_proc_2theta_range", len(intensities)
        inc, start, end = float(v[f"{prefix}_inc"]), float(v[f"{prefix}_min"]), float(v[f"{prefix}_max"])
        two_theta = np.arange(0, end, inc) # BUG? getting 1999 bins for start=0.02
        spectrum = DataFrame({"2θ": two_theta, "intensity": intensities}).set_index("2θ")
        spectrum.attrs = {"name": "powder diffraction", "title": "Powder Diffraction Pattern"}
        contributions.append({
            "identifier": identifier, "formula": v["_chemical_formula"],
            "data": {"type": typ, "date": date, "wavelength": wavelength},
            "tables": [spectrum], "attachments": [path]
        })

len(contributions)

In [None]:
client.delete_contributions()
client.init_columns(columns)
client.submit_contributions(contributions, ignore_dupes=True, per_request=6)
# this shouldn't be necessary but need to re-init columns likely due to bug in API server
client.init_columns(columns) 