In [None]:
import os, gzip, json
from mpcontribs.client import Client, chunks
from urllib.request import urlretrieve
from monty.json import MontyDecoder
from unflatten import unflatten
from tqdm.notebook import tqdm

In [None]:
name = '2dmatpedia'
client = Client()

**Retrieve project info**

In [None]:
client.projects.update_entry(pk=name, project={
    'unique_identifiers': False
}).result()

client.get_project(name).pretty()

**Create contributions**

In [None]:
config = {
    "file": "http://www.2dmatpedia.org/static/db.json.gz",
    "details": "http://www.2dmatpedia.org/2dmaterials/doc/{}",
    'columns': {
        'material_id': {'name': 'details'},
        'exfoliation_energy_per_atom': {'name': 'Eₓ', 'unit': 'eV'},
        'energy_per_atom': {'name': 'E', 'unit': 'meV'},
        'energy_vdw_per_atom': {'name': 'ΔEⱽᴰᵂ', 'unit': 'meV'},
        'bandgap': {'name': 'ΔE', 'unit': 'meV'},
    }
}

In [None]:
dbdir = "/Users/patrick/gitrepos/mp/MPContribs/mpcontribs-data"
dbfile = config['file'].rsplit('/')[-1]
dbpath = os.path.join(dbdir, dbfile)
raw_data = []  # as read from raw files

if not os.path.exists(dbpath):
    print('downloading', dbpath, '...')
    urlretrieve(config['file'], dbpath)

with gzip.open(dbpath, 'rb') as f:
    for line in f:
        raw_data.append(json.loads(line, cls=MontyDecoder))

len(raw_data)

In [None]:
contributions = []

for rd in tqdm(raw_data):
    identifier = rd["source_id"] 

    if identifier.startswith('mp-') or identifier.startswith('mvc-'):
        data = {}
        
        for k, col in config['columns'].items():
            hdr, unit = col['name'], col.get('unit')
            if k == 'material_id':
                data[hdr] = config[hdr].format(rd[k])
            elif k in rd:
                if unit:
                    try:
                        float(rd[k])
                    except ValueError:
                        continue
                data[hdr] = f'{rd[k]} {unit}' if unit else rd[k]

        contributions.append({
            'project': name, 'is_public': True,
            'identifier': identifier, 'data': data,
            'structures': [rd['structure']]
        })
        
        if len(contributions) == 1000:
            break

len(contributions)

In [None]:
client.delete_contributions(name)

for chunk in chunks(contributions, n=100):
    client.submit_contributions(chunk, per_page=20, ignore_dupes=True, skip_dupe_check=True)