Skip to content

Commit

Permalink
fix: handle lineage of subqueries (#3075)
Browse files Browse the repository at this point in the history
  • Loading branch information
tobymao committed Mar 3, 2024
1 parent 2770ddc commit 4173ea2
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
10 changes: 10 additions & 0 deletions sqlglot/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,16 @@ def to_node(
)
)

if isinstance(scope.expression, exp.Subquery):
for source in scope.subquery_scopes:
return to_node(
column,
scope=source,
dialect=dialect,
upstream=upstream,
source_name=source_name,
reference_node_name=reference_node_name,
)
if isinstance(scope.expression, exp.Union):
upstream = upstream or Node(name="UNION", source=scope.expression, expression=select)

Expand Down
35 changes: 35 additions & 0 deletions tests/test_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,41 @@ def test_subquery(self) -> None:
node = node.downstream[0]
self.assertEqual(node.name, "z.a")

node = lineage(
"a",
"""
WITH foo AS (
SELECT
1 AS a
), bar AS (
(
SELECT
a + 1 AS a
FROM foo
)
)
(
SELECT
a + b AS a
FROM bar
CROSS JOIN (
SELECT
2 AS b
) AS baz
)
""",
)
self.assertEqual(node.name, "a")
self.assertEqual(len(node.downstream), 2)
a, b = sorted(node.downstream, key=lambda n: n.name)
self.assertEqual(a.name, "bar.a")
self.assertEqual(len(a.downstream), 1)
self.assertEqual(b.name, "baz.b")
self.assertEqual(b.downstream, [])

node = a.downstream[0]
self.assertEqual(node.name, "foo.a")

def test_lineage_cte_union(self) -> None:
query = """
WITH dataset AS (
Expand Down

0 comments on commit 4173ea2

Please sign in to comment.