Skip to content

Commit

Permalink
fix: assertion error when a comment is placed before table name (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
reata committed Mar 23, 2020
1 parent d79057f commit d80f93b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
16 changes: 10 additions & 6 deletions sqllineage/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from typing import List, Set

import sqlparse
from sqlparse.sql import Function, Identifier, Parenthesis, Statement, TokenList
from sqlparse.sql import Comment, Function, Identifier, Parenthesis, Statement, TokenList
from sqlparse.tokens import Keyword, Token

SOURCE_TABLE_TOKENS = ('FROM', 'JOIN', 'INNER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'LEFT OUTER JOIN', 'RIGHT OUTER JOIN',
'FULL OUTER JOIN', 'CROSS JOIN')
TARGET_TABLE_TOKENS = ('INTO', 'OVERWRITE', 'TABLE')
TEMP_TABLE_TOKENS = ('WITH', )
TEMP_TABLE_TOKENS = ('WITH',)


class LineageParser(object):
Expand Down Expand Up @@ -53,7 +53,7 @@ def source_tables(self) -> Set[str]:
def target_tables(self) -> Set[str]:
return self._target_tables

def _extract_from_token(self, token: Token):
def _extract_from_token(self, token: Token) -> None:
source_table_token_flag = target_table_token_flag = temp_table_token_flag = False
for sub_token in token.tokens:
if isinstance(sub_token, TokenList):
Expand All @@ -72,7 +72,7 @@ def _extract_from_token(self, token: Token):
self._target_tables.add(sub_token.get_alias())
continue
if source_table_token_flag:
if sub_token.is_whitespace:
if self.__token_negligible_before_tablename(sub_token):
continue
else:
assert isinstance(sub_token, Identifier)
Expand All @@ -85,7 +85,7 @@ def _extract_from_token(self, token: Token):
self._source_tables.add(sub_token.get_real_name())
source_table_token_flag = False
elif target_table_token_flag:
if sub_token.is_whitespace:
if self.__token_negligible_before_tablename(sub_token):
continue
elif isinstance(sub_token, Function):
# insert into tab (col1, col2), tab (col1, col2) will be parsed as Function
Expand All @@ -98,7 +98,7 @@ def _extract_from_token(self, token: Token):
self._target_tables.add(sub_token.get_real_name())
target_table_token_flag = False
elif temp_table_token_flag:
if sub_token.is_whitespace:
if self.__token_negligible_before_tablename(sub_token):
continue
else:
assert isinstance(sub_token, Identifier)
Expand All @@ -107,6 +107,10 @@ def _extract_from_token(self, token: Token):
self._extract_from_token(sub_token)
temp_table_token_flag = False

@classmethod
def __token_negligible_before_tablename(cls, token: Token) -> bool:
return token.is_whitespace or isinstance(token, Comment)


def main():
parser = argparse.ArgumentParser(prog='sqllineage', description='SQL Lineage Parser.')
Expand Down
8 changes: 8 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ def test_select_with_comment():
helper("SELECT -- comment1\n col1 FROM tab1", {"tab1"})


def test_select_with_comment_after_from():
helper("SELECT col1\nFROM -- comment\ntab1", {"tab1"})


def test_select_with_comment_after_join():
helper("select * from tab1 join --comment\ntab2 on tab1.x = tab2.x", {"tab1", "tab2"})


def test_select_keyword_as_column_alias():
# here `as` is the column alias
helper("SELECT 1 `as` FROM tab1", {"tab1"})
Expand Down

0 comments on commit d80f93b

Please sign in to comment.