In [290]:
import collections
import logging
import re
import socket
import warnings

import yaml

CONFIGS_TO_ALWAYS_ATTACH_AND_REMOVE = ['further_reading']

def yaml_file_to_dict(filepath):
    """
    Given a yaml file, returns a corresponding dictionary.
    
    If you do not give an extension, tries again after appending one.
    
    Parameters
    ----------
    filepath : str
        Where to get the YAML file from
        
    Returns
    -------
    dict
        A dictionary representation of the yaml file.
    """
    for extension in ['', '.yml', '.yaml', '.YML', '.YAML']:
        try:
            with open(filepath+extension) as yaml_file:
                return yaml.load(yaml_file, Loader=yaml.FullLoader)
        except FileNotFoundError:
            logging.debug(
                "File not found with %s, trying another extension pattern." % filepath+extension
            )
    else:
        raise FileNotFoundError("All file extensions tried and none worked for %s" % filepath)
            
    
def attach_to_config_and_remove(config, attach_key):
    """
    Attaches extra dict to this one and removes the chapter
    
    Updates the dictionary on ``config`` with values
    from any file found under a listing specified by ``attach_key``.
    
    Parameters
    ----------
    config : dict
        The configuration to update
    attach_key : str
        A key who's value points to a list of various yaml files to 
        update ``config`` with.
    
    Note
    ----
    The ``config`` is modified **in place**!.
    """
    if attach_key in config:
        for attach_value in config[attach_key]:
            attachable_config = yaml_file_to_dict(attach_value)
            config.update(attachable_config)
        del config[attach_key]

        
def flatten_bottom_up(dictionary):
    """
    Flattens a nested dictionary, keeping the innermost values for repeated keys.
    
    Parameters
    ----------
    dictionary : dict 
        The dictionary to flatten
        
    Returns
    -------
    A new, flattened dictionary.
    """
    output = dict()
    for k, v in dictionary.items():
        if isinstance(v, dict):
            output.update(flatten_bottom_up(v))
        else:
            output[k] = v
    return output

def flatten_top_down(dictionary):
    """
    Flattens a nested dictionary, keeping the outermost values for repeated keys
    
    Parameters
    ----------
    dictionary : dict
        The dictionary to flatten
        
    Returns
    -------
    A new, flattened dictionary.
    """
    output = dict()
    for k, v in dictionary.items():
        if isinstance(v, dict):
            inner_dict = flatten_top_down(v)
            for k, v in inner_dict.items():
                if k not in output:
                    output[k] = v
        elif k not in output:
            output[k] = v
    return output

def del_value_for_nested_key(config, key):
    """
    In a dict of dicts, delete a key/value pair.
    
    Parameters
    ----------
    config : dict
        The dict to delete in.
    key : str
        The key to delete.
    
    Note
    ----
    The ``config`` is modified **in place**!.
    """
    if key in config:
        del config[key]
    for k, v in config.items():
        if isinstance(v, dict):
            del_value_for_nested_key(v, key)


def find_value_for_nested_key(config, key):
    """In a dict of dicts, find a value for a given key"""
    if key in config:
        return config[key]
    for k, v in config.items():
        if isinstance(v, dict):
            return find_value_for_nested_key(v, key)
            

def make_choice_in_config(config, key):
    print(f"Trying to make choice for {key}")
    choice = find_value_for_nested_key(config, key)
    if not isinstance(choice, collections.Hashable):
        choice = freeze(choice)
    del_value_for_nested_key(config, key)
    if "choose_"+key in config:
        try:
            #print(f"config[{key}] = dict({choice}=config['choose_'+{key}][{choice}])")
            if not isinstance(choice, str):
                config[key] = {choice: config['choose_'+key][choice]}
            else:
                config[key] = choice
            del config["choose_"+key]
        except KeyError:
            warnings.warn("Could not find a choice for %s" % key)

            
def promote_value_to_key(config, promotable_key):
    for key in list(config):
        value = config[key]
        if key == "promote_"+promotable_key and isinstance(value, dict):
            for inner_key, inner_value in value.items():
                if inner_key == config[promotable_key]:
                    config.update(inner_value)
                    del config['promote_'+promotable_key]
                    return promotable_key
            else:
                warnings.warn("Couldn't find promotable key %s in %s" % (config[promotable_key], value))
 

def promote_all(config):
    all_keys = list(config)
    needed_promotions = [key.replace("promote_", "") for key in all_keys if key.startswith("promote_")]
    all_other_keys = [key for key in all_keys if not key.startswith('promote_')]
    if set(needed_promotions).issubset(flatten_bottom_up(config)):
        iterations=0
        while needed_promotions:
            iterations += 1
            if iterations > 10: break
            current_key_loop = [key for key in list(config) if not key.startswith('promote_')]
            print("%s: %s" % (iterations, current_key_loop))
            for key in current_key_loop:
                print("Working on:", key)
                promoted_key = promote_value_to_key(config, key)
                if promoted_key:
                    print("Found %s, removing from %s" % (promoted_key, needed_promotions))
                    needed_promotions.remove(promoted_key)
    else:
        raise ValueError("This configuration does not have enough information to promote everything!")

def freeze(d):
    """Allows you to hash lists and dicts""" 
    if isinstance(d, dict):
        return frozenset((key, freeze(value)) for key, value in d.items())
    elif isinstance(d, list):
        return tuple(freeze(value) for value in d)
    return d    
     
    
def attach_to_config_and_reduce_keyword(config, full_keyword, reduced_keyword):
    if full_keyword in config:
        config[reduced_keyword] = config[full_keyword]
        # FIXME: Does this only need to work for lists?
        if isinstance(config[full_keyword], list):
            for item in config[full_keyword]:
                loadable_item = item if item.endswith('.yaml') else item+".yaml"
                config[item] = yaml_file_to_dict(loadable_item)
                for attachment in CONFIGS_TO_ALWAYS_ATTACH_AND_REMOVE:
                    attach_to_config_and_remove(config[item], attachment)
    del config[full_keyword]  
 

def recursive_make_choices(config, first_choices=['jobtype']):
    all_config_keys = list(config)
    for k in all_config_keys: 
        v = config[k]
        if isinstance(k, str) and k.startswith("choose_"):
            make_choice_in_config(config, k.replace("choose_", ""))
        if isinstance(v, dict):
            recursive_make_choices(v)
            
            
def recursive_promote_all(config):
    promote_all(config)
    for key in list(config):
        value = config[key]
        if isinstance(value, dict):
            recursive_promote_all(value)
                
                
def pass_down(config, key):
    for thing_below in config[key]:
        this_thing = config[thing_below]
        this_thing.setdefault('inherited_attrs', {})
        popped_thing_below = config.pop(thing_below)
        for k, v in config.items():
            if k not in config[key] and k not in this_thing and k != key:
                logging.debug("Passing %s=%s down to %s" % (k, v, thing_below))
                this_thing['inherited_attrs'][k] = v #if k not in config[key]
            else:
                logging.debug("%s already has an attribute %s" % (thing_below, k))
        config[thing_below] = popped_thing_below
        

def determine_computer_from_hostname():
    all_computers = yaml_file_to_dict('all_machines.yaml')
    for this_computer in all_computers:
        for nodetype, computer_pattern in all_computers[this_computer].items():
            if re.match(computer_pattern, socket.gethostname()):
                return this_computer+".yaml"

            
class GeneralConfig(dict):
    def __init__(self, path):
        self.config = yaml_file_to_dict(path)
        for attachment in CONFIGS_TO_ALWAYS_ATTACH_AND_REMOVE:
            attach_to_config_and_remove(self.config, attachment)
        self._config_init()
        for k, v in self.config.items():
            self.__setitem__(k, v)
        del self.config
        
        
class ConfigSetup(GeneralConfig):
    def _config_init(self):
        setup_relevant_configs = {
            'computer': yaml_file_to_dict(determine_computer_from_hostname()),
        }
        recursive_promote_all(setup_relevant_configs)
        if self.config['standalone_model']:
            self.config = {
                **setup_relevant_configs,
                **ConfigComponent(self.config['model']),
            }
            recursive_make_choices(self.config)
            print("Unordered DICT, try again!")
            # Since the dictionary resolves choices in an unordered way, there might still be unresolved choices:
            # To resolve, do the pass down again:
            pass_down(self.config, 'submodels')
            # And re-resolve choices:
            recursive_make_choices(self.config)
        else:
            attach_to_config_and_reduce_keyword(self.config, 'include_models', 'models')
            for model in self.config['models']:
                self.config[model] = ConfigComponent(model)
        
        
class ConfigComponent(GeneralConfig):
    def _config_init(self):
        attach_to_config_and_reduce_keyword(self.config, 'include_submodels', 'submodels')
        pass_down(self.config, 'submodels')
        recursive_make_choices(self.config)

In [291]:
config = ConfigSetup('echam')

1: ['hostnames', 'nodetypes', 'batch_system', 'operating_system', 'jobtype', 'logical_cpus_per_core', 'threads_per_core', 'pool_directories', 'cores_per_node']
Working on: hostnames
Working on: nodetypes
Working on: batch_system
Working on: operating_system
Working on: jobtype
Found jobtype, removing from ['jobtype', 'partition']
Working on: logical_cpus_per_core
Working on: threads_per_core
Working on: pool_directories
Working on: cores_per_node
2: ['hostnames', 'nodetypes', 'batch_system', 'operating_system', 'jobtype', 'logical_cpus_per_core', 'threads_per_core', 'pool_directories', 'cores_per_node', 'partition']
Working on: hostnames
Working on: nodetypes
Working on: batch_system
Working on: operating_system
Working on: jobtype
Working on: logical_cpus_per_core
Working on: threads_per_core
Working on: pool_directories
Working on: cores_per_node
Working on: partition
Found partition, removing from ['partition']
Trying to make choice for resolution
Trying to make choice for cores_per



In [292]:
config

{'computer': {'hostnames': {'login': 'ollie[01]',
   'compute': 'prod-[0-9]{3}',
   'mini': 'mini'},
  'nodetypes': ['login', 'compute', 'fat', 'mini'],
  'batch_system': 'slurm',
  'operating_system': {'linux': 'centos'},
  'jobtype': 'compute',
  'logical_cpus_per_core': 2,
  'threads_per_core': 1,
  'pool_directories': {'pool': '/work/ollie/pool',
   'projects': '/work/ollie/projects'},
  'cores_per_node': 36,
  'partition': 'mpp'},
 'model': 'echam',
 'repository': 'https://gitlab.dkrz.de/modular_esm/echam6',
 'type': 'atmosphere',
 'standalone_model': True,
 'description': 'The ECHAM atmosphere model, major version 6\n(someone from Hamburg)\npaper citation\n',
 'license_text': 'Please make sure you have a licence to use echam.In case you are\nunsure, please contact redmine....\n',
 'pool_dir': '@MACHINE_POOL_DIR@/ECHAM6',
 'executable': 'echam6',
 'version': '6.304p1',
 'versions': {'6.304p1': {'repo_tag': '6.304p1', 'dataset': 'r0007'}},
 'INI_PARENT_DATE': 22941231,
 'INI_PARENT