In [1]:
import yaml
import pandas as pd
from pathlib import Path
foo_yaml = Path('foo.yml').read_text()
dag_config = yaml.safe_load(foo_yaml)
# pd.DataFrame(dag_config['tasks'])

In [None]:
tasks_all = dag_config['dag_factory_dp']['tasks']
tasks_df = pd.DataFrame(tasks_all).T
tasks_df['task_name'] = tasks_df.index
tasks_df.reset_index(drop=True).head()

In [4]:
tasks_all_mappings = {k: v['task_group_name'] for k, v in tasks_all.items()}

In [54]:
from copy import deepcopy
def write_dag_yml(dag_name = 'views_content'):
    _tasks = {
        task_name: task_config 
        for task_name, task_config in tasks_all.items() 
        if task_config['task_group_name'] == dag_name
    }
    tasks = deepcopy(_tasks)
    task_names = [k for k, v in tasks.items()]
    
    for task_name in task_names:
        dependencies = tasks[task_name].get('dependencies', [])
        deps = []
        for dep in dependencies:
            if 'src' in dep:
                pass
            elif dep not in task_names:
                deps.append(f'wait_for_{dep}')
            else:
                deps.append(dep)
                
        tasks[task_name]['task_group_name'] = f'{task_name.split("_")[0]}_g'
        tasks[task_name]['dependencies'] = deps

    deps = [v.get('dependencies', []) for k, v in tasks.items()]
    wait_for_tasks = list(set([item for sublist in deps for item in sublist if 'wait' in item]))
    for wait_for_task in wait_for_tasks:
        tasks[wait_for_task] = dict(
        operator = "airflow.sensors.external_task.ExternalTaskSensor",
        external_dag_id =  tasks_all_mappings[wait_for_task.replace('wait_for_', '')],
        external_task_id = wait_for_task.replace('wait_for_', ''),
        task_group_name = tasks_all_mappings[wait_for_task.replace('wait_for_', '')]
    )

    _task_groups = list(set([v.get('task_group_name') for k, v in tasks.items() if v.get('task_group_name') is not None]))
    task_groups = {v: {} for v in _task_groups}
    
    out_2 = dict(task_groups = task_groups, tasks = tasks)
    out = {dag_name: {**out_1, **out_2}}
    with open(f'{dag_name}.yml', 'w') as file:
        yaml.dump(out, sort_keys=False, stream=file)


In [40]:
out_1 = yaml.safe_load("""
default_args:
    owner: example_owner
    start_date: '2022-02-12'
    retries: 1
    retry_delay_sec: 300
    dir: '/workspace/test-dbt-airflow/dbt-greenery'
schedule_interval: None
concurrency: 1
max_active_runs: 1
dagrun_timeout_sec: 300
default_view: tree
orientation: LR
description: this is an example dag!
""")
