In [30]:
import json
from typing import Any, Union

In [2]:
# set up data provenance

dir = '/home/nathan/Documents/nomad-FAIR/dependencies/parsers/electronic/electronicparsers/fhiaims/'
tier_ranking = {'light': 3, 'tight': 2, 'really_tight': 1}  # ranked by descending hierarchy
tiers = sorted(tier_ranking, key=tier_ranking.get)


In [54]:
def filter_native_quantities(elements: dict[str, Any]) -> dict[str, Any]:
    """
    Filter out all non-application quantities from the elements dictionary.
    """
    filtered_elements: dict[str, Any] = {}
    for tag, element in elements.items():
        # distinguish application quantities
        if tag[:2] == 'x_':
            if isinstance(element, list):
                if all(isinstance(x, dict) for x in element):
                    filtered_elements[tag] = [filter_native_quantities(spec) for spec in element]
            else:
                filtered_elements[tag] = element
    return filtered_elements

In [59]:
def replace_indexed_quantities(elements: dict[str, Any]) -> dict[str, Any]:
    """
    Replace indexed quantities with a list of comparable entries.
    This implementation assumes that all indexes are present.
    """
    filtered_elements: dict[str, Any] = {}
    new_quantities: dict[str, dict[Any, int]] = {}
    numbers = [str(i) for i in range(10)]
    for tag, element in elements.items():
        # distinguish indexed quantities
        if tag[-1] in numbers:
            new_tag, indexed = tag[:-1], int(tag[-1])
            # store indexed quantities separately in `new_quantities`
            # importantly, they aren't stored in `filtered_elements` directly
            if new_tag not in new_quantities:
                new_quantities[new_tag] = {}
            new_quantities[new_tag][element] = indexed
        else:
            filtered_elements[tag] = element
    for new_tag, new_quantity in new_quantities.items():
        # add sorted indexed quantities
        filtered_elements[new_tag] = sorted(new_quantity.keys(), key=lambda x: new_quantity[x])
    return filtered_elements

In [66]:
def filter_quantity_name(element: dict[str, Any], quantity_names: list[str]) -> dict[str, Any]:
    """
    Filter out the name of the quantity from the element dictionary.
    """
    filtered_element: dict[str, Any] = {}
    for tag, value in element.items():
        if tag not in quantity_names:
            filtered_element[tag] = value
    return filtered_element

In [69]:
# read in and process all data

filtered_quantities = [
                            'x_fhi_aims_controlIn_nucleus',
                            'x_fhi_aims_controlIn_mass',
                            'x_fhi_aims_controlIn_species_name',
                            'x_fhi_aims_controlIn_angular_grids_method',
                         ]  # filter out other identifiers

all_tiers: dict[str, dict[str, Union[str, Any]]] = {}
for tier_name in tiers:
    filename = f'{tier_name}.json'
    with open(filename, 'r') as f:
        tier_data = json.load(f)
        data = tier_data['sections']['section_run-0']['sections']['section_method-0']['x_fhi_aims_section_controlIn_basis_set']
        for element in data:
            element_name = element['x_fhi_aims_controlIn_species_name']
            if element_name not in all_tiers:
                all_tiers[element_name] = {}
            all_tiers[element_name][tier_name] = replace_indexed_quantities(
                filter_native_quantities(
                    filter_quantity_name(element, filtered_quantities)
                )
            )
        f.close()

In [70]:
# sample test the final result

all_tiers['H']['light']

{'x_fhi_aims_controlIn_radial_multiplier': 1,
 'x_fhi_aims_controlIn_outer_grid': 302.0,
 'x_fhi_aims_controlIn_l_hartree': 4,
 'x_fhi_aims_controlIn_basis_dep_cutoff': 0.0001,
 'x_fhi_aims_section_controlIn_basis_func': [{'x_fhi_aims_controlIn_basis_func_l': 's',
   'x_fhi_aims_controlIn_basis_func_radius': 1.0,
   'x_fhi_aims_controlIn_basis_func_type': 'valence',
   'x_fhi_aims_controlIn_basis_func_n': 1},
  {'x_fhi_aims_controlIn_basis_func_l': 's',
   'x_fhi_aims_controlIn_basis_func_radius': 0.5,
   'x_fhi_aims_controlIn_basis_func_type': 'ion_occ',
   'x_fhi_aims_controlIn_basis_func_n': 1},
  {'x_fhi_aims_controlIn_basis_func_l': 's',
   'x_fhi_aims_controlIn_basis_func_radius': 2.1,
   'x_fhi_aims_controlIn_basis_func_type': 'hydro',
   'x_fhi_aims_controlIn_basis_func_n': 2},
  {'x_fhi_aims_controlIn_basis_func_l': 'p',
   'x_fhi_aims_controlIn_basis_func_radius': 3.5,
   'x_fhi_aims_controlIn_basis_func_type': 'hydro',
   'x_fhi_aims_controlIn_basis_func_n': 2}],
 'x_fhi_aim

In [71]:
# write the data to disk
with open('native_tier_references.json', 'w') as f:
    json.dump(all_tiers, f, indent=4)
    f.close()