In [1]:
from myml.nbinit import *

In [2]:
import re
from etl.files import yaml_dump

def move_field_to_top(data, field):
    """
    Returns a new dictionary with the specified field moved to the top.
    If the field doesn't exist, returns the original dictionary.
    """
    if field not in data:
        return data

    # Create a new dictionary starting with the specified field
    new_data = {field: data[field]}

    # Add the remaining items in their original order
    for key, value in data.items():
        if key != field:
            new_data[key] = value

    return new_data


def dump_yaml_with_anchors(data):
    """
    Dump a dictionary to a YAML string, converting definition keys to anchors
    and replacing quoted alias strings with YAML aliases.

    Args:
        data (dict): The dictionary to dump.

    Returns:
        str: The YAML string with anchors and aliases.
    """
    # Dump the dict to a YAML string. Using default_flow_style=False to get block style.
    dumped = yaml_dump(data)

    # For any key in the definitions block starting with "def_",
    # insert an anchor. This regex finds lines with an indented key that starts with def_.
    dumped = re.sub(
        r"^(\s+)(def_[^:]+):(.*)$",
        lambda m: f"{m.group(1)}{m.group(2)}: &{m.group(2)}{m.group(3)}",
        dumped,
        flags=re.MULTILINE
    )

    # Replace quoted alias strings like '*def_2329260084214905053'
    # with an unquoted alias *def_2329260084214905053.
    dumped = re.sub(
        r"""(['"])(\*def_[^'"]+)\1""",
        lambda m: m.group(2),
        dumped
    )

    return dumped



In [4]:
import yaml
from collections import defaultdict
from etl.collections.explorer_migration import migrate_csv_explorer
from etl.files import yaml_dump
from etl.paths import EXPLORERS_DIR, STEP_DIR

config = migrate_csv_explorer("influenza")

In [None]:
import yaml
from collections import defaultdict
from etl.collections.explorer_migration import migrate_csv_explorer
from etl.files import yaml_dump
from etl.paths import EXPLORERS_DIR, STEP_DIR

config = migrate_csv_explorer("influenza")
# print(yaml.dump(config))
# config = yaml.safe_load(yaml_dump(config))

definitions = defaultdict(dict)

for view in config["views"]:
    # Move to common_views
    del view['config']['timelineMinTime']

    # Create shared definitions
    for indicator in view["indicators"]['y']:
        # Move some fields into definitions
        display = indicator['display']
        for key in ('additionalInfo', 'sourceLink', 'dataPublishedBy', 'sourceName'):
            info = display[key]
            info = info.replace('\\n', '\n')

            h = "def_" + str(abs(hash(display[key])))

            definitions[key][h] = info
            display[key] = '*' + h

definitions['common_views'] = [
    {
        "config": {
            "timelineMinTime": "-4043",
        },
    }
]

config["definitions"] = definitions

config = move_field_to_top(config, "definitions")

In [19]:
# extract display for indicators

tables = defaultdict(dict)

for view in config['views']:
    for ind in view['indicators']['y']:
        table_name, col = ind['catalogPath'].split('#')
        if 'variables' not in tables[table_name]:
            tables[table_name]['variables'] = {}
        variable_entry = {
            "title": col,
            'unit': ind['display'].pop('unit')
        }
        variable_entry["display"] = {
            "name": ind['display'].pop('name')
        }
        if 'shortUnit' in ind['display']:
            variable_entry['short_unit'] = ind['display'].pop('shortUnit')
        tables[table_name]['variables'][col] = variable_entry

In [20]:
# dump explorer config
path_new = STEP_DIR / "export/explorers/who/latest/influenza.config.yml"
with open(path_new, "w") as f:
    f.write(dump_yaml_with_anchors(config))

# dump metadata for the grapher step
# path_new = STEP_DIR / "data/grapher/who/latest/flu.meta.yml"
# with open(path_new, "w") as f:
#     f.write(yaml_dump({"tables": tables}))