In [1]:
from QueryParser import QueryParser

In [2]:
sql = f"""WITH recent AS (
    SELECT account_id, balance
    FROM core.accounts
    WHERE updated_at >= DATEADD('day', -7, CURRENT_DATE)
)
SELECT
    r.account_id,
    b.balance_bucket
FROM recent r
JOIN analytics.balance_lookup b
    ON r.balance + 10 = b.bucket_start
"""
parser_a = QueryParser(sql)

In [3]:
parser_a.column_lineage()

{'recent': {'account_id': ['core.accounts'], 'balance': ['core.accounts']}}

In [4]:
parser_a.feature_columns()

[{'name': 'account_id', 'potential_tables': ['core.accounts']},
 {'name': 'balance_bucket', 'potential_tables': ['analytics.balance_lookup']},
 {'name': 'bucket_start', 'potential_tables': ['analytics.balance_lookup']},
 {'name': 'balance', 'potential_tables': ['core.accounts']},
 {'name': 'updated_at', 'potential_tables': ['core.accounts']}]

In [5]:
parser_a.joins()

[{'join_type': 'INNER JOIN',
  'column_left': Column(name='balance', potential_tables=['core.accounts'], lineage=None),
  'column_right': Column(name='bucket_start', potential_tables=['analytics.balance_lookup'], lineage=None),
  'complex_left': 'r.balance + 10'}]

In [6]:
sql = """
SELECT
    a.COLUMN_A,
    b.COLUMN_B,
    AMBIGUOUS_COL
FROM DB.SCHEMA.TABLE_A a
JOIN DB.SCHEMA.TABLE_B b
    ON a.COLUMN_A = b.COLUMN_B
"""
parser_b = QueryParser(sql)

In [7]:
parser_b.feature_columns()

[{'name': 'COLUMN_A', 'potential_tables': ['DB.SCHEMA.TABLE_A']},
 {'name': 'COLUMN_B', 'potential_tables': ['DB.SCHEMA.TABLE_B']},
 {'name': 'AMBIGUOUS_COL',
  'potential_tables': ['DB.SCHEMA.TABLE_A', 'DB.SCHEMA.TABLE_B']}]

In [8]:
parser_b.joins()

[{'join_type': 'INNER JOIN',
  'column_left': Column(name='COLUMN_A', potential_tables=['DB.SCHEMA.TABLE_A'], lineage=None),
  'column_right': Column(name='COLUMN_B', potential_tables=['DB.SCHEMA.TABLE_B'], lineage=None)}]