In [None]:
import wget, json
from pathlib import Path
from string import capwords
from pybtex.database import parse_string
import pybtex.errors
from mpcontribs.client import Client
pybtex.errors.set_strict_mode(False)

In [None]:
client = Client(host='ml-api.materialsproject.cloud')

In [None]:
datadir = Path('/Users/patrick/gitrepos/mp/mpcontribs-data/')
fn = Path('dataset_metadata.json')
fp = datadir / fn
if not fp.exists():
    url = f'https://raw.githubusercontent.com/hackingmaterials/matminer/master/matminer/datasets/{fn}'
    wget.download(url)
    fn.rename(fp)

In [None]:
data = json.load(open(fp, 'r'))

In [None]:
for name, info in data.items():
    if not name.startswith('matbench_'):
        continue
    
    columns = {}
    for col, text in info['columns'].items():
        k = col.replace('_', '|').replace('-', '|').replace('(', ' ').replace(')', '')
        k = capwords(k).replace(' ', '')
        columns[k] = text
        
    project = {
        'name': name,
        'is_public': True,
        'owner': 'ardunn@lbl.gov',
        'title': name, # TODO update and set long_title
        'authors': 'A. Dunn, A. Jain',
        'description': info['description'],
        'other': {
            'columns': columns,
            'entries': info['num_entries']
        },
        'references': [
            {'label': 'FigShare', 'url': info['url']}
        ]
    }
    
    print(name)
    for ref in info['bibtex_refs']:
        if name == "matbench_phonons":
            ref = ref.replace(
                "petretto_dwaraknath_miranda_winston_giantomassi_rignanese_van setten_gonze_persson_hautier_2018",
                "petretto2018"
            )
            
        bib = parse_string(ref, 'bibtex')
        for key, entry in bib.entries.items():
            key_is_doi = key.startswith('doi:')
            url = 'https://doi.org/' + key.split(':', 1)[-1] if key_is_doi else entry.fields.get('url')
            k = 'Zhuo2018' if key_is_doi else capwords(key.replace('_', ''))
            if k.startswith('C2'):
                k = 'Castelli2012'
            elif k.startswith('Landolt'):
                k = 'LB1997'
            elif k == 'Citrine':
                url = 'https://www.citrination.com'
            
            if len(k) > 8:
                k = k[:4] + k[-4:]
            project['references'].append(
                {'label': k, 'url': url}
            )

    try:
        print(client.projects.create_entry(project=project).result())
    except Exception as ex:
        print(ex)  # TODO should use get_entry to check existence -> use update_entry if project exists