In [None]:
from mpcontribs.client import load_client
from time import sleep, time
import gzip, json, os
from tqdm import tqdm
import numpy as np
from pandas import DataFrame
from collections import defaultdict

In [None]:
project = 'carrier_transport'
client = load_client('your-api-key-here')

Get and update project info (see https://portal.mpcontribs.org/carrier_transport)

In [None]:
# client.projects.get_entry(pk=project, _fields=['_all']).result()
# client.projects.update_entry(pk=project, project={'long_title': 'Electronic Transport Properties'}).result()

Create contributions with tables

In [None]:
input_dir = '/project/projectdirs/matgen/fricci/transport_data/coarse'
variables = [
    {'key': 'cond_eff_mass', 'name': 'mₑᶜᵒⁿᵈ', 'unit': 'mₑ'},
    {'key': 'seebeck_doping', 'name': 'S', 'unit': 'µV/K'},
    {'key': 'cond_doping', 'name': 'σ', 'unit': '1/Ω/m/s'},
]
eigs_keys = ['ε₁', 'ε₂', 'ε₃', 'ε̄']
props = {
    'seebeck_doping': ['S', 'µV/K'],
    'cond_doping': ['σ', '1/Ω/m/s'],
    'kappa_doping': ['κₑ', 'W/K/m/s']
}
pfkey = '⟨S²σ⟩'

In [None]:
files = [x for x in os.scandir(input_dir) if x.is_file()]
len(files)

In [None]:
start = 200
interval = 1500
for idx, obj in enumerate(files[start:start+interval]):
    if not idx%25:
        if idx > 0:
            stop_time = time()
            duration = stop_time-start_time
            print(idx, duration)
        start_time = time()

    contrib = {'project': project, 'identifier': None, 'data': {}}
    with gzip.open(obj.path, 'rb') as input_file:
        data = json.loads(input_file.read())
        contrib['identifier'] = data['mp_id']
        task_type = list(data['gap'].keys())[0]
        gap = list(data['gap'].values())[0]
        contrib['data']['task'] = list(data['task_id'].values())[0]
        contrib['data']['type'] = task_type
        contrib['data']['metal'] = 'Yes' if gap < 0.1 else 'No'        
        contrib['data']['T'] = '300 K'
        contrib['data']['doplvl'] = '1e18 cm⁻³'
        contrib['data']['ΔE'] = ' '.join([str(gap), 'eV'])
        contrib['data']['V'] = ' '.join([str(data['volume']), 'Å³'])
        
        S2 = None
        for v in variables:
            for doping_type in ['p', 'n']:
                d = data[task_type][v['key']].get(doping_type, {}).get('300', {}).get('1e+18', {})
                
                if d:
                    eigs = d if isinstance(d, list) else d['eigs']
                    key = '|'.join([v['name'], doping_type])
                    contrib['data'][key] = dict(
                        (eigs_keys[neig], ' '.join([str(eig), v['unit']]))
                        for neig, eig in enumerate(eigs)
                    )
                    contrib['data'][key][eigs_keys[-1]] = ' '.join([str(np.mean(eigs)), v['unit']])
                    if v['key'] == 'seebeck_doping':
                        S2 = np.dot(d['tensor'], d['tensor'])
                    elif v['key'] == 'cond_doping':
                        pf = np.mean(np.linalg.eigh(np.dot(S2, d['tensor']))[0]) * 1e-8
                        if pfkey not in contrib['data']:
                            contrib['data'][pfkey] = {}
                        contrib['data'][pfkey][doping_type] = ' '.join([str(pf), 'µW/cm/K²/s'])
                        
        # build data and max values for seebeck, conductivity and kappa
        tables = defaultdict(dict)        
        for prop_name, (label, unit) in props.items():
            for doping_type in ['p', 'n']:
                prop = data[task_type][prop_name][doping_type]
                prop_averages, dopings, columns = [], None, ['T [K]']
                temps = sorted(map(int, prop.keys()))
                for temp in temps:
                    row = [temp]
                    if dopings is None:
                        dopings = sorted(map(float, prop[str(temp)].keys()))
                    for doping in dopings:
                        doping_str = f'{doping:.0e}'
                        if len(columns) <= len(dopings):
                            columns.append(f'{doping_str} cm⁻³ [{unit}]')
                        eigs = prop[str(temp)][doping_str]['eigs']
                        row.append(np.mean(eigs))
                    prop_averages.append(row)
                
                table_name = f'{label}({doping_type})'
                np_prop_averages = np.array(prop_averages)
                tables[table_name] = DataFrame(np_prop_averages, columns=columns)

                arr_prop_avg = np.array(np_prop_averages)[:,1:]
                max_v = np.max(arr_prop_avg)
                if prop_name[0] == 's' and doping_type == 'n':
                    max_v = np.min(arr_prop_avg)
                if prop_name[0] == 'k':
                    max_v = np.min(arr_prop_avg)
                arg_max = np.argwhere(arr_prop_avg==max_v)[0]

                elabel = label + 'ᵉ'
                edoping_type = 'ⁿ' if doping_type == 'n' else 'ᵖ'
                contrib['data'][elabel] = {
                    doping_type: ' '.join([str(max_v), unit]),
                    f'T{edoping_type}': ' '.join([str(temps[arg_max[0]]), 'K']),
                    f'c{edoping_type}': ' '.join([str(dopings[arg_max[1]]), 'cm⁻³']),
                } 

    ntries = 0
    while ntries < 3:
        try:
            print(start+idx, contrib['identifier'])
            res = client.contributions.get_entries(
                project=project, identifier=contrib['identifier'], _fields=['id']
            ).result()
            cids = [d['id'] for d in res['data']]

            for cid in cids:
                client.contributions.delete_entry(pk=cid).result()
                #print('contribution', cid, 'deleted')

            cid = client.contributions.create_entry(contribution=contrib).result()['id']
            #print('contribution', cid, 'created')

            for name, table in tables.items():
                for col in table.columns:
                    table[col] = table[col].astype(str)
                entry = table.to_dict(orient='split')
                entry.pop('index')
                entry['contribution'] = cid
                entry['name'] = name
                tid = client.tables.create_entry(table=entry).result()['id']
                #print('table', tid, 'created')
            
            break
        except Exception as ex:
            ntries += 1
            print(ex)
            sleep(30*ntries)
    else:
        print('I give up.')
        break

Specific contribution

In [None]:
contrib = client.contributions.get_entry(pk='5e4de5a2fce4e9a91ba1324f', _fields=['_all']).result()
contrib['data']

In [None]:
tid = contrib['tables'][0]['id']
client.tables.get_entry(pk=tid, _fields=['_all']).result()

Query by materials identifiers

In [None]:
identifiers = ['mp-2715', 'mp-988', 'mp-9899']
client.contributions.get_entries(
    project=project, identifier__in=identifiers,
    _fields=['identifier', 'formula', 'data.<S>.p.value']
).result()

Query by values

In [None]:
# see https://portal.mpcontribs.org/<project> and
# https://api.mpcontribs.org/#/contributions/get_entries

limit = 20 # 200 is the limit of retrievable #contributions per page
fields = ['<S>', '<σ>', '<S²σ>'] # which data fields to retrieve
mask = [f'data.{field}' for field in fields]
mask += ['formula', 'identifier']
filters = {
    'formula__contains': 'Li3',
#     'data__<σ>__p__lt': 2e15,
#     'data__<σ>__n__lt': 2e15
}

contribs = client.contributions.get_entries(
    project=project, _fields=mask, _limit=limit, **filters
).result() # -> see pagination

print('found', contribs['total_count'], 'materials')
contribs['data'][0]

Pagination

In [None]:
# TODO outdated
# only run this once you've optimized masks and filters to what you actually need (see above)
all_contribs = []
while 1:
    page = int(len(all_contribs) / per_page) + 1
    contribs = client.contributions.get_entries(
        projects=[project], filters=filters, mask=mask, per_page=per_page, page=page
    ).result()
    all_contribs.extend(contribs)
    if not contribs or len(contribs) < per_page:
        break

print('found', len(all_contribs), 'materials')
all_contribs[-1]