In [1]:
import intake
import sys
import logging
from collections.abc import Iterable
import dask
logging.basicConfig()
logging.getLogger().setLevel(logging.ERROR)

url = "../../catalog.yaml"
cat = intake.open_catalog(url)

In [2]:
def filter_warnings():
    import warnings
    warnings.filterwarnings("ignore", "The specified Dask chunks separate the stored chunks along")
    warnings.filterwarnings("ignore", "Increasing number of chunks by factor of 10")
filter_warnings()

In [3]:
class BadDataset (Exception):
    pass

def tree(cat, level=0, stream=sys.stdout):
    prefix = " " * (3 * level)
    try:
        for child in list(cat):
            logging.info(child)
            parameters = [
                f'{p["name"]}: {p["allowed"]}' for p in cat[child].describe().get("user_parameters", [])
            ]
            if len(parameters) > 0:
                parameter_str = " (" + ", ".join(parameters) + ")"
            else:
                parameter_str = ""
            mds = get_metadata_string(cat[child])
            dask_test = test_dask(cat[child])
            try:
                test_all_user_parameters(cat[child])
            except BadDataset as bd:
                logging.critical(f' Error in {child}: {bd}')
            print(prefix + str(child) + parameter_str + mds + dask_test, file=stream)
            tree(cat[child], level + 1)

    except TypeError as e:  # we are probably trying to iterate a zarr file or so.
        if "object is not iterable" not in str(e):
            raise e


def get_metadata_string(entry):
    if not isinstance(entry, Iterable):
        md = entry.metadata
        try:
            mds = f"\tP: {md['project']}, E: {md['experiment_id']}, Model: {md['source_id']}, ID: {md['simulation_id']}"
        except KeyError as e:
            logging.critical(f"Cannot process {entry}. Metadata key {str(e)} missing")
            mds = "### METADATA MISSING ### " + str(md)
        return mds
    else:
        return ""

def test_dask(entry):
    if not isinstance(entry, Iterable):
        try: 
            with dask.config.set(**{'array.slicing.split_large_chunks': True}):
                vars = list(entry.to_dask().variables.keys())
        except Exception as e:
            logging.critical(f"Cannot convert {entry} to dask.")
            raise e

        return f", {len(vars)} Variables"
    else:
        return ""
    

def iterate_user_parameters(entry, fixed_keys = None):
    if fixed_keys is None:
        fixed_keys = {}
    user_parameters = entry.describe()['user_parameters']
    names = [ x['name'] for x in user_parameters ]
    remaining_keys = [x for x in names if x not in fixed_keys.keys() ]
    if len (remaining_keys) == 0:
        return
    param = user_parameters[len(fixed_keys)]
    logging.debug (param)
    key = param['name']
    for value in param['allowed']:
        fixed_keys[key] = value
        logging.debug (fixed_keys)
        if len (remaining_keys) == 1 :
            logging.debug (fixed_keys)
            yield fixed_keys
        else:
            yield from iterate_user_parameters(entry, fixed_keys.copy())

def test_all_user_parameters(entry):
    errors = []
    for x in iterate_user_parameters(entry):
        try:
            ds = entry(**x).to_dask()
        except KeyError as e:
            errors.append(f"could not convert entry with params {x} to dask: KeyError: {e}")
            continue
        logging.debug(f'found {len(ds.variables)} variables with {len(ds.time)} time steps for {x}')
    if len (errors):
        raise BadDataset ("\n".join(str(x) for x in errors))
    

In [4]:
tree(cat.ICON)

ngc4008 (time: ['PT15M', 'PT3H', 'P1D'], zoom: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0])	P: nextGEMS, E: nextgems_prefinal, Model: ICON-ESM, ID: ngc4008, 111 Variables
ngc4007 (time: ['PT15M', 'PT3H', 'P1D'], zoom: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])	P: nextGEMS, E: nextgems_prefinal, Model: ICON-ESM, ID: ngc4007, 111 Variables
ngc4006 (time: ['PT15M', 'PT3H', 'P1D'], zoom: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])	P: nextGEMS, E: nextgems_prefinal, Model: ICON-ESM, ID: ngc4006, 109 Variables
ngc4005 (time: ['PT15M', 'PT30M', 'PT1H', 'PT3H', 'PT6H', 'P1D'], zoom: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])	P: nextGEMS, E: nextgems_prefinal, Model: ICON-ESM, ID: ngc4005, 112 Variables
ngc3028 (time: ['PT30M', 'PT3H', 'P1D'], zoom: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0])	P: nextGEMS, E: nextgems_cycle3, Model: ICON-ESM, ID: ngc3028, 102 Variables
ngc3028_bc_land	P: nextGEMS, E: nextgems_cycle3, Model: ICON-ESM, ID: ngc3028, 43 Variables
ngc3026_WILL_BE_DELETED (time: ['PT30M', 'PT3H', 'P1D'], zoom: [10, 9, 8, 7, 6, 5, 4, 3