Skip to content

Commit

Permalink
feat: make ansi the default dialect (#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
reata committed Dec 31, 2023
1 parent e5c1300 commit 1c716ec
Show file tree
Hide file tree
Showing 33 changed files with 286 additions and 332 deletions.
2 changes: 1 addition & 1 deletion sqllineage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@
DEFAULT_HOST = "localhost"
DEFAULT_PORT = 5000
SQLPARSE_DIALECT = "non-validating"
DEFAULT_DIALECT = SQLPARSE_DIALECT
DEFAULT_DIALECT = "ansi"
3 changes: 2 additions & 1 deletion sqllineage/core/parser/sqlparse/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Where,
)

from sqllineage import SQLPARSE_DIALECT
from sqllineage.core.analyzer import LineageAnalyzer
from sqllineage.core.holders import StatementLineageHolder, SubQueryLineageHolder
from sqllineage.core.metadata_provider import MetaDataProvider
Expand All @@ -37,7 +38,7 @@ class SqlParseLineageAnalyzer(LineageAnalyzer):
"""SQL Statement Level Lineage Analyzer."""

PARSER_NAME = "sqlparse"
SUPPORTED_DIALECTS = ["non-validating"]
SUPPORTED_DIALECTS = [SQLPARSE_DIALECT]

def analyze(
self, sql: str, metadata_provider: MetaDataProvider
Expand Down
7 changes: 4 additions & 3 deletions sqllineage/core/parser/sqlparse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from sqlparse.utils import imt

from sqllineage import SQLPARSE_DIALECT
from sqllineage.core.models import Column, Schema, SubQuery, Table
from sqllineage.core.parser.sqlparse.utils import get_parameters, is_subquery
from sqllineage.utils.entities import ColumnQualifierTuple
Expand Down Expand Up @@ -107,9 +108,9 @@ def _extract_source_columns(token: Token) -> List[ColumnQualifierTuple]:
# (SELECT avg(col1) AS col1 FROM tab3), used after WHEN or THEN in CASE clause
src_cols = [
lineage[0]
for lineage in LineageRunner(token.value).get_column_lineage(
exclude_subquery=False
)
for lineage in LineageRunner(
token.value, dialect=SQLPARSE_DIALECT
).get_column_lineage(exclude_subquery=False)
]
source_columns = [
ColumnQualifierTuple(
Expand Down
13 changes: 6 additions & 7 deletions sqllineage/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ def __init__(
"""
if dialect == SQLPARSE_DIALECT:
warnings.warn(
"dialect `non-validating` is deprecated, use `ansi` or dialect of your SQL instead. "
"`non-validating` will stop being the default dialect in v1.5.x release "
"and be completely removed in v1.6.x",
f"dialect `{SQLPARSE_DIALECT}` is deprecated, use `ansi` or dialect of your SQL instead. "
f"`{SQLPARSE_DIALECT}` will be completely removed in v1.6.x",
DeprecationWarning,
stacklevel=2,
)
Expand Down Expand Up @@ -217,14 +216,14 @@ def supported_dialects() -> Dict[str, List[str]]:
"""
dialects = OrderedDict(
[
(
SqlParseLineageAnalyzer.PARSER_NAME,
SqlParseLineageAnalyzer.SUPPORTED_DIALECTS,
),
(
SqlFluffLineageAnalyzer.PARSER_NAME,
SqlFluffLineageAnalyzer.SUPPORTED_DIALECTS,
),
(
SqlParseLineageAnalyzer.PARSER_NAME,
SqlParseLineageAnalyzer.SUPPORTED_DIALECTS,
),
]
)
return dialects
16 changes: 7 additions & 9 deletions tests/core/test_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from sqllineage import SQLPARSE_DIALECT
from sqllineage.exceptions import (
InvalidSyntaxException,
SQLLineageException,
Expand All @@ -13,18 +14,18 @@
def test_select_without_table():
with pytest.raises(SQLLineageException):
LineageRunner("select * from where foo='bar'")._eval()
with pytest.raises(SQLLineageException):
LineageRunner("select * from where foo='bar'", dialect=SQLPARSE_DIALECT)._eval()


def test_full_unparsable_query_in_sqlfluff():
with pytest.raises(InvalidSyntaxException):
LineageRunner("WRONG SELECT FROM tab1", dialect="ansi")._eval()
LineageRunner("WRONG SELECT FROM tab1")._eval()


def test_partial_unparsable_query_in_sqlfluff():
with pytest.raises(InvalidSyntaxException):
LineageRunner(
"SELECT * FROM tab1 AS FULL FULL OUTER JOIN tab2", dialect="ansi"
)._eval()
LineageRunner("SELECT * FROM tab1 AS FULL FULL OUTER JOIN tab2")._eval()


def test_partial_unparsable_query_in_sqlfluff_with_tsql_batch():
Expand All @@ -38,14 +39,12 @@ def test_partial_unparsable_query_in_sqlfluff_with_tsql_batch():

def test_unsupported_query_type_in_sqlfluff():
with pytest.raises(UnsupportedStatementException):
LineageRunner(
"CREATE UNIQUE INDEX title_idx ON films (title)", dialect="ansi"
)._eval()
LineageRunner("CREATE UNIQUE INDEX title_idx ON films (title)")._eval()


def test_deprecation_warning_in_sqlparse():
with pytest.warns(DeprecationWarning):
LineageRunner("SELECT * FROM DUAL", dialect="non-validating")._eval()
LineageRunner("SELECT * FROM DUAL", dialect=SQLPARSE_DIALECT)._eval()


def test_syntax_warning_no_semicolon_in_tsql():
Expand All @@ -63,5 +62,4 @@ def test_user_warning_enable_tsql_no_semicolon_with_other_dialect():
LineageRunner(
"""SELECT * FROM foo;
SELECT * FROM bar""",
dialect="ansi",
)._eval()
26 changes: 6 additions & 20 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pathlib import Path
from typing import Optional

import networkx as nx
from sqlalchemy import (
Column as SQLAlchemyColumn,
Integer,
Expand All @@ -20,7 +19,7 @@
from sqllineage.runner import LineageRunner


def assert_table_lineage(lr: LineageRunner, source_tables=None, target_tables=None):
def _assert_table_lineage(lr: LineageRunner, source_tables=None, target_tables=None):
for _type, actual, expected in zip(
["Source", "Target"],
[lr.source_tables, lr.target_tables],
Expand All @@ -37,7 +36,7 @@ def assert_table_lineage(lr: LineageRunner, source_tables=None, target_tables=No
), f"\n\tExpected {_type} Table: {expected}\n\tActual {_type} Table: {actual}"


def assert_column_lineage(lr: LineageRunner, column_lineages=None):
def _assert_column_lineage(lr: LineageRunner, column_lineages=None):
expected = set()
if column_lineages:
for src, tgt in column_lineages:
Expand All @@ -61,16 +60,13 @@ def assert_table_lineage_equal(
dialect: str = "ansi",
test_sqlfluff: bool = True,
test_sqlparse: bool = True,
skip_graph_check: bool = False,
):
lr = LineageRunner(sql, dialect=SQLPARSE_DIALECT)
lr_sqlfluff = LineageRunner(sql, dialect=dialect)
if test_sqlparse:
assert_table_lineage(lr, source_tables, target_tables)
_assert_table_lineage(lr, source_tables, target_tables)
if test_sqlfluff:
assert_table_lineage(lr_sqlfluff, source_tables, target_tables)
if test_sqlparse and test_sqlfluff and not skip_graph_check:
assert_lr_graphs_match(lr, lr_sqlfluff)
_assert_table_lineage(lr_sqlfluff, source_tables, target_tables)


def assert_column_lineage_equal(
Expand All @@ -80,7 +76,6 @@ def assert_column_lineage_equal(
metadata_provider: Optional[MetaDataProvider] = None,
test_sqlfluff: bool = True,
test_sqlparse: bool = True,
skip_graph_check: bool = False,
):
metadata_provider = (
DummyMetaDataProvider() if metadata_provider is None else metadata_provider
Expand All @@ -92,18 +87,9 @@ def assert_column_lineage_equal(
sql, dialect=dialect, metadata_provider=metadata_provider
)
if test_sqlparse:
assert_column_lineage(lr, column_lineages)
_assert_column_lineage(lr, column_lineages)
if test_sqlfluff:
assert_column_lineage(lr_sqlfluff, column_lineages)
if test_sqlparse and test_sqlfluff and not skip_graph_check:
assert_lr_graphs_match(lr, lr_sqlfluff)


def assert_lr_graphs_match(lr: LineageRunner, lr_sqlfluff: LineageRunner) -> None:
assert nx.is_isomorphic(lr._sql_holder.graph, lr_sqlfluff._sql_holder.graph), (
f"\n\tGraph with sqlparse: {lr._sql_holder.graph}\n\t"
f"Graph with sqlfluff: {lr_sqlfluff._sql_holder.graph}"
)
_assert_column_lineage(lr_sqlfluff, column_lineages)


def generate_metadata_providers(test_schemas):
Expand Down
42 changes: 42 additions & 0 deletions tests/sql/column/test_column_select_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,45 @@ def test_select_column_name_case_insensitive():
)
],
)


def test_non_reserved_keyword_as_column_name():
sql = """INSERT INTO tab1
SELECT host
FROM tab2 a"""
assert_column_lineage_equal(
sql,
[
(
ColumnQualifierTuple("host", "tab2"),
ColumnQualifierTuple("host", "tab1"),
)
],
test_sqlparse=False,
)


def test_current_timestamp():
"""
current_timestamp is a keyword since ANSI SQL-2016
sqlparse cannot produce the correct AST for this case
"""
sql = """INSERT INTO tab1
SELECT current_timestamp as col1,
col2,
col3
FROM tab2"""
assert_column_lineage_equal(
sql,
[
(
ColumnQualifierTuple("col2", "tab2"),
ColumnQualifierTuple("col2", "tab1"),
),
(
ColumnQualifierTuple("col3", "tab2"),
ColumnQualifierTuple("col3", "tab1"),
),
],
test_sqlparse=False,
)
27 changes: 27 additions & 0 deletions tests/sql/column/test_column_select_column_dialect_specific.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest

from sqllineage.utils.entities import ColumnQualifierTuple
from ...helpers import assert_column_lineage_equal


@pytest.mark.parametrize("dialect", ["tsql"])
def test_tsql_assignment_operator(dialect: str):
"""
Assignment Operator is a Transact-SQL specific feature, used interchangeably with column alias
https://learn.microsoft.com/en-us/sql/t-sql/language-elements/assignment-operator-transact-sql?view=sql-server-ver15
"""
sql = """INSERT INTO foo
SELECT FirstColumnHeading = 'xyz',
SecondColumnHeading = ProductID
FROM Production.Product"""
assert_column_lineage_equal(
sql,
[
(
ColumnQualifierTuple("ProductID", "Production.Product"),
ColumnQualifierTuple("SecondColumnHeading", "foo"),
)
],
dialect=dialect,
test_sqlparse=False,
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
specify columns in CREATE/INSERT statement, sqlparse would parse table/view name as function call
specify columns in CREATE/INSERT statement.
DO NOT support this feature with non-validating dialect because sqlparse would parse table/view name as function call
"""

from sqllineage.utils.entities import ColumnQualifierTuple
Expand Down
18 changes: 18 additions & 0 deletions tests/sql/column/test_column_select_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,21 @@ def test_select_column_using_expression_with_table_qualifier_without_column_alia
),
],
)


def test_subquery_expression_without_source_table():
assert_column_lineage_equal(
"""INSERT INTO foo
SELECT (SELECT col1 + col2 AS result) AS sum_result
FROM bar""",
[
(
ColumnQualifierTuple("col1", "bar"),
ColumnQualifierTuple("sum_result", "foo"),
),
(
ColumnQualifierTuple("col2", "bar"),
ColumnQualifierTuple("sum_result", "foo"),
),
],
)
14 changes: 14 additions & 0 deletions tests/sql/column/test_column_select_from_cte.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,17 @@ def test_smarter_column_resolution_using_query_context():
(ColumnQualifierTuple("d", "tab2"), ColumnQualifierTuple("d", "tab3")),
],
)


def test_cte_inside_insert_in_parenthesis():
sql = """INSERT INTO tab3 (WITH tab1 AS (SELECT * FROM tab2) SELECT * FROM tab1)"""
assert_column_lineage_equal(
sql,
[
(
ColumnQualifierTuple("*", "tab2"),
ColumnQualifierTuple("*", "tab3"),
),
],
test_sqlparse=False,
)
25 changes: 25 additions & 0 deletions tests/sql/column/test_column_select_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,28 @@ def test_column_with_ctas_and_func():
),
],
)


def test_coalesce_with_whitespace():
"""
coalesce is a keyword since ANSI SQL-2023
usually it's parsed as a function. however, when whitespace followed which is valid syntax,
sqlparse cannot produce the correct AST
"""
sql = """INSERT INTO tab1
SELECT coalesce (col1, col2) as col3
FROM tab2"""
assert_column_lineage_equal(
sql,
[
(
ColumnQualifierTuple("col1", "tab2"),
ColumnQualifierTuple("col3", "tab1"),
),
(
ColumnQualifierTuple("col2", "tab2"),
ColumnQualifierTuple("col3", "tab1"),
),
],
test_sqlparse=False,
)
4 changes: 2 additions & 2 deletions tests/sql/column/test_metadata_unqualified_column.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import pytest
from tests.helpers import assert_column_lineage_equal, generate_metadata_providers

from sqllineage.core.metadata_provider import MetaDataProvider
from sqllineage.exceptions import InvalidSyntaxException
from sqllineage.runner import LineageRunner
from sqllineage.utils.entities import ColumnQualifierTuple
from ...helpers import assert_column_lineage_equal, generate_metadata_providers


providers = generate_metadata_providers(
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_select_column_from_tempview_view_subquery(provider: MetaDataProvider):

@pytest.mark.parametrize("provider", providers)
def test_sqlparse_exception(provider: MetaDataProvider):
sql = """insert into table db.tbl
sql = """insert into db.tbl
select id
from db1.table1 t1
join db2.table2 t2 on t1.id = t2.id
Expand Down
2 changes: 1 addition & 1 deletion tests/sql/column/test_metadata_wildcard.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pytest
from tests.helpers import assert_column_lineage_equal, generate_metadata_providers

from sqllineage.core.metadata_provider import MetaDataProvider
from sqllineage.utils.entities import ColumnQualifierTuple
from ...helpers import assert_column_lineage_equal, generate_metadata_providers


providers = generate_metadata_providers(
Expand Down
4 changes: 0 additions & 4 deletions tests/sql/sqlfluff_only/__init__.py

This file was deleted.

0 comments on commit 1c716ec

Please sign in to comment.