In [None]:
import json, os
from mpcontribs.client import Client
from pymatgen import Composition, Structure, MPRester
from tqdm.notebook import tqdm

In [None]:
name = 'MnO2_phase_selection'
client = Client()
mpr = MPRester()

**Retrieve and update project info**

In [None]:
client.get_project(name).pretty()

In [None]:
phase_names = {
    'beta': 'Pyrolusite',
    'gamma': 'Intergrowth',
    'ramsdellite': 'Ramsdellite',
    'alpha': 'Hollandite',
    'lambda': 'Spinel',
    'delta': 'Layered',
    'other': 'Other',
}

In [None]:
client.projects.update_entry(pk=name, project={
    'other.phase−names': phase_names, 'unique_identifiers': True
}).result()

**Create contributions**

In [None]:
# mp_contrib_phases: data/MPContrib_formatted_entries.json
# hull_states: data/MPContrib_hull_entries.json
data = {}
for fn in os.scandir('/Users/patrick/gitrepos/mp/MPContribs/mpcontribs-data/MnO2_phase_selection'):
    with open(fn, 'r') as f:
        data[fn.name] = json.load(f)

In [None]:
other = [
    ['LiMnO2', -3.064, 'Y', '--'], ['KMnO2', -2.222, 'Y', '--'],
    ['Ca0.5MnO2', -2.941, 'Y', '--'], ['Na0.5MnO2', -1.415, 'Y', '--']
]

In [None]:
identifiers, contributions = set(), []

for hstate in tqdm(data['MPContrib_hull_entries.json']):
    contrib = {'project': name, 'is_public': True, 'structures': []}
    phase = hstate['phase']
    composition = Composition.from_dict(hstate['c'])
    structure = Structure.from_dict(hstate['s'])
    mpids = mpr.find_structure(structure)
    comp = composition.get_integer_formula_and_factor()[0]
    identifier = mpids[0] if mpids else comp
    contrib['identifier'] = identifier
    
    if identifier in identifiers:
        continue
        
    phase_name = phase_names[phase]
    phase_data = data['MPContrib_formatted_entries.json'].get(phase_name, other)
    if not phase_data:
        # print('no data found for', composition, phase_name)
        continue

    for iv, values in enumerate(phase_data):
        if Composition(values[0]) == composition:
            contrib['data'] = {'GS': values[2], 'ΔH': f'{values[1]} eV/mol'}
            if not isinstance(values[3], str):
                contrib['data']['ΔHʰ'] = f'{values[3]} eV/mol'
            break
    else:
        # print('no data found for', composition, phase)
        continue

    contrib['structures'].append(structure)
    contributions.append(contrib)
    identifiers.add(identifier)

# make sure that contributions with all columns come first
contributions = [d for d in sorted(
    contributions, key=lambda x: len(x["data"]), reverse=True
)]
len(contributions)

**Submit contributions**

In [None]:
# need to delete before re-submitting due to unique_identifiers=False
client.delete_contributions(name)
# ignore_dupes=True ignores/skips duplicate structures
client.submit_contributions(contributions, ignore_dupes=True, limit=30)

**Query contributions**

In [None]:
query = {
    "project": name,
    "formula__contains": "Mg",
#     "data__GS__contains": "Y",
#     "data__ΔH__value__lte": -2,
    "_order_by": "data__ΔH__value",
    "order": "desc",
    "_fields": [
        "id", "identifier", "formula",
        "data.GS", "data.ΔH.value", "data.ΔHʰ.value"
    ]
}
client.contributions.get_entries(**query).result()