In [None]:
import os, json, tarfile
from mpcontribs.client import Client
from urllib.request import urlretrieve
from monty.json import MontyDecoder
from unflatten import unflatten
from tqdm.notebook import tqdm

In [None]:
name = 'jarvis_dft'
client = Client()

**Retrieve and update project info**

In [None]:
# client.projects.update_entry(pk=name, project={
#     'long_title': '2D & 3D DFT Calculations by NIST',
#     'unique_identifiers': False
# }).result()
client.get_project(name).pretty()

**Create contributions**

In [None]:
dimensions = ['2d', '3d']
tgz = "jdft_{}.json.tgz"
config = {
    "file": f"https://www.ctcms.nist.gov/~knc6/{tgz}",
    "details": "https://www.ctcms.nist.gov/~knc6/jsmol/{}.html",
    'columns': {  # 'mpid'
        'jid': {'name': 'details'},
        'fin_en': {'name': 'E', 'unit': 'meV'},
        'exfoliation_en': {'name': 'Eₓ', 'unit': 'eV'},
        'form_enp': {'name': 'ΔH', 'unit': 'eV'},
        'op_gap': {'name': 'ΔEⱽᴰᵂ', 'unit': 'meV'},
        'mbj_gap': {'name': 'ΔEᴹᴮᴶ', 'unit': 'meV'},
        'kv': {'name': 'Kᵥ', 'unit': 'GPa'},
        'gv': {'name': 'Gᵥ', 'unit': 'GPa'},
        'magmom': {'name': 'µ', 'unit': 'µᵇ'}
    }
}

In [None]:
dbdir = "/Users/patrick/gitrepos/mp/MPContribs/mpcontribs-data"
raw_data = {}  # as read from raw files

for dim in dimensions:
    url = config["file"].format(dim)
    dbfile = url.rsplit('/')[-1]
    dbpath = os.path.join(dbdir, dbfile)
    
    if not os.path.exists(dbpath):
        print('downloading', dbpath, '...')
        urlretrieve(url, dbpath)

    with tarfile.open(dbpath, "r:gz") as tar:
        member = tar.getmembers()[0]
        raw_data[dim] = json.load(tar.extractfile(member), cls=MontyDecoder)
        
    print(dim, len(raw_data[dim]))

**Create contributions**

In [None]:
contributions = []
ncontribs = sum(len(raw_data[dim]) for dim in dimensions)

with tqdm(total=ncontribs) as pbar:
    for dim in dimensions:
        for rd in raw_data[dim]:
            contrib = {
                'project': name, 'is_public': True,
                'identifier': rd["mpid"],
                'data': {'type': dim.upper()}
            }

            dct = {}
            for k, col in config['columns'].items():
                hdr, unit = col['name'], col.get('unit')
                if k == 'jid':
                    dct[hdr] = config[hdr].format(rd[k])
                elif k in rd:
                    if unit and rd[k]:
                        try:
                            float(rd[k])
                        except ValueError:
                            continue
                    dct[hdr] = f'{rd[k]} {unit}' if unit else rd[k]

                contrib["data"].update(unflatten(dct))

            contrib["structures"] = [rd['final_str']]
            contributions.append(contrib)
            pbar.update(1)

# make sure that contributions with all columns come first
contributions = [d for d in sorted(
    contributions, key=lambda x: len(x["data"]), reverse=True
)]

**Submit contributions**

In [None]:
# need to delete contributions first due to unique_identifiers=False
client.delete_contributions(name)
client.submit_contributions(contributions, per_page=100)

**Query contributions**

In [None]:
query = {
    "project": name,
    "formula__contains": "Co",
    "data__type__contains": "3D",
    "data__Kᵥ__value__gte": 0,
    "_order_by": "data__ΔEⱽᴰᵂ__value",
    "order": "desc",
    "_fields": [
        "id", "identifier", "formula",
        "data.type", "data.ΔEⱽᴰᵂ.value",
        "data.ΔEᴹᴮᴶ.value", "data.Kᵥ.value",
        "structures"
    ],
    "_limit": 10
}    
resp = client.contributions.get_entries(**query).result()

In [None]:
sid = resp["data"][0]["structures"][0]["id"]
client.get_structure(sid)