In [1]:
import subprocess
import os
from parsons import Redshift, Table
import json
rs = Redshift()

In [3]:
dd_tables = rs.query('select * from tmc_van.data_dictionary_tables')

In [4]:
dd_columns = rs.query('select * from tmc_van.data_dictionary_columns')

In [176]:
all_tables = rs.get_tables('tmc_van').select_rows(lambda row: 'tsm_tmc_' in row.tablename)

rs_table_utilities INFO Retrieving tables info.


In [73]:
other_tables = ['tsm_tmc_users',
 'tsm_tmc_usersusergroups',
'tsm_tmc_committees',
'tsm_tmc_activistcodes',
'tsm_tmc_surveyquestions',
 'tsm_tmc_surveyresponses']

In [87]:
vf_types = []
for y in [x for x in vf_tables if '_vf' in x]:
    cols = rs.get_columns_list("tmc_van",y)
    cols = [x for x in cols if 'type' in x]
    vf_types = vf_types + cols
type_tables = [x.replace('id','s') for x in list(set(vf_types))]

type_tables = ['tsm_tmc_'+x for x in type_tables]

In [179]:
all_tables = all_tables.select_rows(lambda row: row.tablename.replace('tsm_tmc_','').replace('_vf','') in tables)

In [196]:
clean_tables = [x.replace('tsm_tmc_','').replace('_vf','') for x in tables]

In [216]:
template = ""
for x in tables:
    template += f"""
    - <<: *{x.replace('tsm_tmc_','').replace('_vf','')}"""
print(template)


    - <<: *activistcodes
    - <<: *codes
    - <<: *codetypes
    - <<: *committees
    - <<: *contactsactivistcodes
    - <<: *contactscodes
    - <<: *contactscontacts
    - <<: *contactsnotes
    - <<: *contactsnotes
    - <<: *contactssurveyresponses
    - <<: *contacttypes
    - <<: *inputtypes
    - <<: *surveyquestions
    - <<: *surveyresponses
    - <<: *users
    - <<: *usersusergroups


In [202]:
schema = 'tmc_van'
args = f""" "schema_name": "{schema}", "generate_columns": "true", "table_names": {json.dumps(tables)}"""        
args = "{"+args+"}"
cmd = f"""dbt run-operation generate_source --args '{args}'"""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
code = result.stdout

In [203]:
code = code.split("\n",2)[2].split('sources:')[1].replace('tsm_tmc_','').replace('_vf','')
print(code)


  - name: tmc_van
    tables:
      - name: codetypes
        columns:
          - name: codetypeid
          - name: codetypename

      - name: codes
        columns:
          - name: statecode
          - name: codeid
          - name: codename
          - name: committeeid
          - name: isactive
          - name: parentcodeid
          - name: createdby
          - name: datecreated
          - name: staticfullname
          - name: codetypeid
          - name: datemodified

      - name: contacttypes
        columns:
          - name: contacttypeid
          - name: contacttypename

      - name: inputtypes
        columns:
          - name: inputtypeid
          - name: inputtypename

      - name: contactsnotes
        columns:
          - name: statecode
          - name: contactsnoteid
          - name: vanid
          - name: createdby
          - name: datecreated
          - name: committeeid
          - name: suppressedby
          - name: datesuppressed
          - 

In [207]:
for x in clean_tables:
    code = code.replace(f"- name: {x}", 
    f"""- {x}_table_template: &{x}
            name: {x}
    """)

In [208]:
print(code)


  - name: tmc_van
    tables:
      - codetypes_table_template: &codetypes
            name: codetypes
    
        columns:
          - name: codetypeid
          - name: codetypename

      - codes_table_template: &codes
            name: codes
    
        columns:
          - name: statecode
          - name: codeid
          - name: codename
          - name: committeeid
          - name: isactive
          - name: parentcodeid
          - name: createdby
          - name: datecreated
          - name: staticfullname
          - name: codetypeid
          - name: datemodified

      - contacttypes_table_template: &contacttypes
            name: contacttypes
    
        columns:
          - name: contacttypeid
          - name: contacttypename

      - inputtypes_table_template: &inputtypes
            name: inputtypes
    
        columns:
          - name: inputtypeid
          - name: inputtypename

      - contactsnotes_table_template: &contactsnotes
            name: contact

In [183]:
yml = "version: 2"+"\n"+"\n"+'sources:' + code
yml = yml.replace('sources:','definitions:').replace('name: tmc_van', 'van_source_template: &van_source_template')

IndexError: list index out of range

In [121]:
f = open(f"models/base/src_template.yml", "a")
f.write(yml)
f.close()

In [143]:
def get_id_column_name(schema, tables, search_columns):
    """
    Description:
        Given a list of Redshift tables, figures out the ID column for each table.
    Args:
        tables: list
            List of tables, from Redshift.get_tables()
        search_columns: list
            List of possible ID column names
    Returns:
        Dict that maps table names to ID columns
    """
    rs = Redshift()
    tbl_and_id_col = {}
    for table in tables:
        table_cols = rs.get_columns_list(schema, table)
        for col in search_columns:
            clean_table = table.replace('tsm_tmc_','').replace('_vf','')
            if col in table_cols:
                tbl_and_id_col[clean_table] = col
                break
            else:
                tbl_and_id_col[clean_table] = None

    return tbl_and_id_col

In [144]:
COMMITTEE_COLS = ['committeeid','committeecreatedby','createdbycommitteeid','createdcommitteeid','personcommitteeid']
tbl_and_cmte_col = get_id_column_name('tmc_van',vf_tables['tablename']+other_tables, COMMITTEE_COLS)

In [156]:
template = """
WITH base AS (

    {brackets_open}
        union_all_by_var(
            source_variable='van',
            default_source_table='{table}',
            source_tables_variable='{table}'
        )
    {brackets_close}

)

, segment_by AS (

    SELECT
        *,
        {committee}

    FROM base
)


SELECT 
    *,
    {brackets_open} 
    staging_metadata_fields(
        vendor='van',
        segment_by_column='committeeid',
        segment_primary_keys=['{key}']
    ) 
    {brackets_close}
FROM segment_by
"""

brackets_open = "{{"
brackets_close = "}}"
vendor = 'van'
for table in tables:
    if 'types' not in table:
        segment_by = tbl_and_cmte_col[table]
        if (segment_by is not None and segment_by!='committeeid'):
            committee = f"""{segment_by} as committeeid"""
        elif segment_by=='committeeid':
            committee=segment_by
        else:
            committee = "NULL::int as committeeid"
    else:
        committee = "NULL::int as committeeid"
    key = table[:-1]+'id'
    sql = template.format(table=table, 
                          brackets_open=brackets_open, brackets_close=brackets_close,
                          committee=committee, key=key)
    f = open(f"models/base/{vendor}/base_{vendor}__{table}.sql", "a")
    f.write(sql)
    f.close()

In [162]:
template = """
    {clean_table}:
        - name: {full_table}
"""
yaml = """
tables:
"""
for table in tables:
    full_table = [x for x in all_tables['tablename'] if table in x][0]
    yaml += template.format(clean_table=table, full_table=full_table)

In [170]:
template = ""
for table in tables:
    template += f"""
                - <<: *{table}
                - name: {[x for x in all_tables['tablename'] if table in x][0]}"""

In [174]:
template = ""
for table in tables:
    template += f"""
                - {table}_table_template: &{table}"""
print(template)


                - activistcodes_table_template: &activistcodes
                - codes_table_template: &codes
                - codetypes_table_template: &codetypes
                - committees_table_template: &committees
                - contactsactivistcodes_table_template: &contactsactivistcodes
                - contactscodes_table_template: &contactscodes
                - contactscontacts_table_template: &contactscontacts
                - contactsnotes_table_template: &contactsnotes
                - contactssurveyresponses_table_template: &contactssurveyresponses
                - contacttypes_table_template: &contacttypes
                - inputtypes_table_template: &inputtypes
                - surveyquestions_table_template: &surveyquestions
                - surveyresponses_table_template: &surveyresponses
                - users_table_template: &users
                - usersusergroups_table_template: &usersusergroups
