In [4]:
import sqlglot
from sqlglot import parse_one, exp
from sqlglot.lineage import lineage
from sqlglot.schema import MappingSchema
from sqlglot.dialects import Snowflake
from sqlglot.optimizer import optimize
from sqlglot.optimizer.scope import traverse_scope
import timeout_decorator



def filter_predicates(sql, debug=False):
    try:
        optimized = optimize_within_time(parse_one(sql, dialect=Snowflake), dialect=Snowflake)
    except Exception as e:
        return e
    
    if debug:
        scopes = traverse_scope(optimized)
        print(f"number of scopes: {len(scopes)}")
    preds = []
    for scope in traverse_scope(optimized):
        if debug:
            print(f"scope: {scope}")
        aliases = {}
        for alias in scope.find_all(exp.TableAlias):
            aliases[alias.name] = alias.parent.name
            if debug: print(f"alias: {alias.name}, parent: {alias.parent.name}")
        for clause in scope.find_all(exp.Where):
            if debug: print(f"where: {clause}")
            filter_columns = []
            for col in clause.find_all(exp.Column):
                if debug: print(f"col: {col.name}, table_alias: {col.table}, table: {aliases[col.table]}")
                filter_columns.append({
                    "column": col.name,
                    "table_alias": col.table,
                    "table": aliases.get(col.table)
                })
            preds.append(
                {
                    "where_clause": clause.sql(dialect=Snowflake),
                    "filter_columns": filter_columns
                }
            )
    return preds


In [22]:
print(sqlglot.__version__)

17.10.2


In [10]:
from sqlglot import parse_one
from sqlglot.optimizer import optimize
from sqlglot.dialects import Snowflake

sql = """
SELECT 
  ("SUBQUERY_0"."KEY") AS "SUBQUERY_1_COL_0"
FROM 
  (
    SELECT 
      * 
    FROM 
      ((( -- <-------------- ***note multiple parens****
          select 
            * 
          from 
            (
              select 
                event_name as key, 
                insert_ts 
              from 
                (
                  select 
                    insert_ts, 
                    event_name 
                  from 
                    sales
                  where 
                    insert_ts > '2023-08-07 21:03:35.590 -0700'
                )
        )
      ))) AS "SF_CONNECTOR_QUERY_ALIAS"
  ) AS "SUBQUERY_0"

"""

print("========= with parens ==========")
optimized = optimize(parse_one(sql, dialect=Snowflake), dialect=Snowflake)
print(optimized.sql(pretty=True))

print("========= without parens ==========")
without_double_parens = sql.replace("((", "").replace("))","")
optimized = optimize(parse_one(without_double_parens, dialect=Snowflake), dialect=Snowflake)
print(optimized.sql(pretty=True))

SELECT
  "SF_CONNECTOR_QUERY_ALIAS"."KEY" AS "SUBQUERY_1_COL_0"
FROM (
  (
    "cte" AS "cte"
  )
) AS "SF_CONNECTOR_QUERY_ALIAS"
SELECT
  "SALES"."EVENT_NAME" AS "SUBQUERY_1_COL_0"
FROM "SALES" AS "SALES"
WHERE
  "SALES"."INSERT_TS" > '2023-08-07 21:03:35.590 -0700'


In [9]:
parse_one(without_double_parens, dialect=Snowflake)

(SELECT expressions: 
  (ALIAS this: 
    (PAREN this: 
      (COLUMN this: 
        (IDENTIFIER this: KEY, quoted: True), table: 
        (IDENTIFIER this: SUBQUERY_0, quoted: True))), alias: 
    (IDENTIFIER this: SUBQUERY_1_COL_0, quoted: True)), from: 
  (FROM this: 
    (SUBQUERY this: 
      (SELECT expressions: 
        (STAR ), from: 
        (FROM this: 
          (SUBQUERY this: 
            (SELECT expressions: 
              (STAR ), from: 
              (FROM this: 
                (SUBQUERY this: 
                  (SELECT expressions: 
                    (ALIAS this: 
                      (COLUMN this: 
                        (IDENTIFIER this: event_name, quoted: False)), alias: 
                      (IDENTIFIER this: key, quoted: False)), 
                    (COLUMN this: 
                      (IDENTIFIER this: insert_ts, quoted: False)), from: 
                    (FROM this: 
                      (SUBQUERY this: 
                        (SELECT expressions: 
  

In [21]:
# sql = """

# select * from (
#     SELECT 
#       MEMBER_GUID as USER_ID, 
#       CONCAT(
#         '| ', 
#         ARRAY_TO_STRING(GROUP_IDS, ' | '), 
#         ' |'
#       ) AS "Active Member Groups" 
#     FROM 
#       EBATES_PROD.CAMPAIGN_ASSET.MEMBER_DATA_MODEL_GROUP 


# """

sql = """
with a as ( select x,y,z from b) 
select z from a
"""

sql = """
with a as (
    select 
       id,
       original as col1, 
       col2
    from 
       t1
)
select
    col2 as col2_renamed
from
  a
where a.col1 = 3902
and id = 123
"""

sql = """
SELECT 
  ("SUBQUERY_0"."KEY") AS "SUBQUERY_1_COL_0"
FROM 
  (
    SELECT 
      * 
    FROM 
      (
          select 
            * 
          from 
            (
              select 
                event_name as key, 
                insert_ts 
              from 
                (
                  select 
                    insert_ts, 
                    event_name 
                  from 
                    sales
                  where 
                    insert_ts > '2023-08-07 21:03:35.590 -0700'
                )
        )
      ) AS "SF_CONNECTOR_QUERY_ALIAS"
  ) AS "SUBQUERY_0"

"""
filter_predicates(sql)

[{'where_clause': 'WHERE "SALES"."INSERT_TS" > \'2023-08-07 21:03:35.590 -0700\'',
  'filter_columns': [{'column': 'INSERT_TS',
    'table_alias': 'SALES',
    'table': 'SALES'}]}]

In [12]:
print(optimized.sql(pretty=True))

SELECT
  "SF_CONNECTOR_QUERY_ALIAS"."KEY" AS "SUBQUERY_1_COL_0",
  "SF_CONNECTOR_QUERY_ALIAS"."VALUE" AS "SUBQUERY_1_COL_1"
FROM (
  "cte" AS "cte"
) AS "SF_CONNECTOR_QUERY_ALIAS"


In [8]:
try:
    optimized = optimize(parse_one(sql, dialect=Snowflake), dialect=Snowflake)
except Exception as e:
    print(e)
scopes = traverse_scope(optimized)
print(f"number of scopes: {len(scopes)}")
preds = []
for scope in traverse_scope(optimized):
    print(f"scope: {scope}")
    aliases = {}
    for alias in scope.find_all(exp.TableAlias):
        aliases[alias.name] = alias.parent.name
        print(f"alias: {alias.name}, parent: {alias.parent.name}")
    for clause in scope.find_all(exp.Where):
        print(f"where: {clause}")
        filter_columns = []
        for col in clause.find_all(exp.Column):
            print(f"col: {col.name}, table_alias: {col.table}, table: {aliases[col.table]}")
            filter_columns.append({
                "column": col.name,
                "table_alias": col.table,
                "table": aliases.get(col.table)
            })
        preds.append(
            {
                "where_clause": clause.sql(dialect=Snowflake),
                "filter_columns": filter_columns
            }
        )



number of scopes: 2
scope: Scope<"cte" AS "cte">
scope: Scope<SELECT "SF_CONNECTOR_QUERY_ALIAS"."KEY" AS "SUBQUERY_1_COL_0", "SF_CONNECTOR_QUERY_ALIAS"."VALUE" AS "SUBQUERY_1_COL_1" FROM ("cte" AS "cte") AS "SF_CONNECTOR_QUERY_ALIAS">


In [28]:
scopes = traverse_scope(optimized)

In [13]:
len(scopes)

0