In [24]:
import subprocess
import yaml
import json
from pathlib import Path

## Utility Functions

In [13]:
def dbt_run_operation(operation, **kwargs):
    args_json = json.dumps(kwargs)
    cmd = f"dbt run-operation {operation} --args '{args_json}' | tail -n +2"
    out = subprocess.getoutput(cmd)
    return(out)


# print(dbt_run_operation('generate_source', database_name='dbt', schema_name='public'))


version: 2

sources:
  - name: public
    tables:
      - name: addresses
      - name: events
      - name: order_items
      - name: orders
      - name: products
      - name: promos
      - name: superheroes
      - name: users


In [20]:
def dbt_generate_source(database, schema, name):
    source_yaml = dbt_run_operation('generate_source', database_name=database, schema_name=schema)
    source_dict = yaml.safe_load(source_yaml)
    return ({
       "version": source_dict['version'],
       "sources": [{
           "name": name,
           "database": database,
           "schema": schema,
           "tables": source_dict['sources'][0]['tables']
       }]
    })


source_greenery = dbt_generate_source('dbt', 'public', 'greenery')



In [35]:
def write_as_yaml(x, file=None):
    x_yaml = yaml.dump(x, sort_keys=False)
    if file is None:
      print(x_yaml)
    else:
      Path(file).write_text(x_yaml)

def dbt_write_source(source):
  source_name = source['sources'][0]['name']
  source_dir = Path(f"models/staging/{source_name}")
  source_dir.mkdir(parents=True, exist_ok=True)
  source_file = source_dir / f"src_{source_name}.yml"
  print(f"Writing source yaml for {source_name} to {source_file}")
  write_as_yaml(source_greenery, f)

dbt_write_source(source_greenery)



Writing source yaml for greenery to models/staging/greenery/src_greenery.yml


In [None]:
def dbt_generate_staging_models(source):
    source_database = source['sources'][0]['database']
    source_schema = source['sources'][0]['schema']
    source_name = source['sources'][0]['name']
    table_names = [table['name'] for table in source['sources'][0]['tables']]
    staging_models = {"name": source_name, "models": {}}
    for table_name in table_names:
        print(table_name)
        sql = dbt_run_operation('generate_base_model', source_name = source_name, table_name = table_name)
        staging_models['models'][table_name] = sql
    return staging_models

staging_models_greenery = dbt_generate_staging_models(source_greenery)


In [57]:
def dbt_write_staging_models(staging_models):
    source_name = staging_models['name']
    for staging_model_name, staging_model_sql in staging_models['models'].items():
        staging_model_dir = Path(f"models/staging/{source_name}")
        staging_model_dir.mkdir(parents=True, exist_ok=True)
        staging_model_file = staging_model_dir / f"stg_{source_name}__{staging_model_name}.sql"
        print(f"Writing staging model for {staging_model_name} to {staging_model_file}")
        staging_model_file.write_text(staging_model_sql)

dbt_write_staging_models(staging_models_greenery)


Writing staging model for addresses to models/staging/greenery/stg_greenery__addresses.sql
Writing staging model for events to models/staging/greenery/stg_greenery__events.sql
Writing staging model for order_items to models/staging/greenery/stg_greenery__order_items.sql
Writing staging model for orders to models/staging/greenery/stg_greenery__orders.sql
Writing staging model for products to models/staging/greenery/stg_greenery__products.sql
Writing staging model for promos to models/staging/greenery/stg_greenery__promos.sql
Writing staging model for superheroes to models/staging/greenery/stg_greenery__superheroes.sql
Writing staging model for users to models/staging/greenery/stg_greenery__users.sql


In [93]:
def dbt_generate_staging_models_yaml(staging_models):
    source_name = staging_models['name']
    staging_models_yaml_dict = []
    for staging_model_name in list(staging_models['models'].keys()):
        staging_model_name = f"stg_{source_name}__{staging_model_name}"
        print(f"Generating yaml for staging model {staging_model_name}")
        staging_model_yaml = dbt_run_operation('generate_model_yaml', model_name = staging_model_name)
        staging_model_yaml_dict = yaml.safe_load(staging_model_yaml)
        staging_models_yaml_dict = staging_models_yaml_dict + staging_model_yaml_dict['models']
  
    return {'name': source_name, 'models': staging_models_yaml_dict}

staging_models_greenery_yaml = dbt_generate_staging_models_yaml(staging_models_greenery)

    

Generating yaml for staging model stg_greenery__addresses
Generating yaml for staging model stg_greenery__events
Generating yaml for staging model stg_greenery__order_items
Generating yaml for staging model stg_greenery__orders
Generating yaml for staging model stg_greenery__products
Generating yaml for staging model stg_greenery__promos
Generating yaml for staging model stg_greenery__superheroes
Generating yaml for staging model stg_greenery__users


In [97]:
def dbt_write_staging_models_yaml(staging_models_yaml):
   source_name = staging_models_yaml['name']
   staging_model_yaml_file = Path(f"models/staging/{source_name}/stg_{source_name}.yml")
   out = {'version': 2, 'models': staging_models_yaml['models']}
   write_as_yaml(out, staging_model_yaml_file)

dbt_write_staging_models_yaml(staging_models_greenery_yaml)


In [63]:
list(staging_models['models'].keys())[:2]

['addresses', 'events']