Skip to content

Commit

Permalink
fix: select from subquery result in alias as table name (#17)
Browse files Browse the repository at this point in the history
* fix: select from subquery result in alias as table name

* refactor: remove unnecessary tokenList checking
  • Loading branch information
reata committed Aug 11, 2019
1 parent b613ec4 commit 2901dd1
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
14 changes: 9 additions & 5 deletions sqllineage/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List, Set

import sqlparse
from sqlparse.sql import Function, Identifier, IdentifierList, Statement, TokenList
from sqlparse.sql import Function, Identifier, Parenthesis, Statement, TokenList
from sqlparse.tokens import Keyword, Token, Whitespace

SOURCE_TABLE_TOKENS = ('FROM', 'JOIN', 'INNER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'LEFT OUTER JOIN', 'RIGHT OUTER JOIN',
Expand Down Expand Up @@ -51,11 +51,9 @@ def target_tables(self) -> Set[str]:
return self._target_tables

def _extract_from_token(self, token: Token):
if not isinstance(token, TokenList):
return
source_table_token_flag = target_table_token_flag = temp_table_token_flag = False
for sub_token in token.tokens:
if isinstance(token, TokenList) and not isinstance(sub_token, (Identifier, IdentifierList)):
if isinstance(sub_token, TokenList):
self._extract_from_token(sub_token)
if sub_token.ttype in Keyword:
if sub_token.normalized in SOURCE_TABLE_TOKENS:
Expand All @@ -75,7 +73,13 @@ def _extract_from_token(self, token: Token):
continue
else:
assert isinstance(sub_token, Identifier)
self._source_tables.add(sub_token.get_real_name())
if isinstance(sub_token.token_first(), Parenthesis):
# SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier
# and this Identifier's get_real_name method would return alias name dt
# referring https://github.com/andialbrecht/sqlparse/issues/218 for further information
pass
else:
self._source_tables.add(sub_token.get_real_name())
source_table_token_flag = False
elif target_table_token_flag:
if sub_token.ttype == Whitespace:
Expand Down
4 changes: 4 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def test_select_count():
helper("SELECT COUNT(*) FROM tab1", {"tab1"})


def test_select_subquery():
helper("SELECT col1 FROM (SELECT col2 FROM tab1) dt", {"tab1"})


def test_select_inner_join():
helper("SELECT * FROM tab1 INNER JOIN tab2", {"tab1", "tab2"})

Expand Down

0 comments on commit 2901dd1

Please sign in to comment.