Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSQL: fix statement delimitation #1612

Merged
merged 20 commits into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/sqlfluff/core/parser/segments/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.segments.raw import RawSegment
from sqlfluff.core.parser.context import ParseContext
from typing import Optional, List


class MetaSegment(RawSegment):
Expand Down Expand Up @@ -31,6 +33,16 @@ def match(cls, segments, parse_context): # pragma: no cover
)
)

@classmethod
def simple(cls, parse_context: ParseContext) -> Optional[List[str]]:
"""Does this matcher support an uppercase hash matching route?

This should be true if the MATCH grammar is simple. Most more
complicated segments will be assumed to overwrite this method
if they wish to be considered simple.
"""
return None


class Indent(MetaSegment):
"""A segment which is empty but indicates where an indent should be.
Expand Down
218 changes: 195 additions & 23 deletions src/sqlfluff/dialects/dialect_tsql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
https://docs.microsoft.com/en-us/sql/t-sql/language-elements/language-elements-transact-sql
"""

from typing import List, Tuple


from sqlfluff.core.parser import (
BaseSegment,
Sequence,
Expand All @@ -27,6 +30,8 @@
)

from sqlfluff.core.dialects import load_raw_dialect
from sqlfluff.core.dialects.common import AliasInfo

from sqlfluff.dialects.dialect_tsql_keywords import (
RESERVED_KEYWORDS,
UNRESERVED_KEYWORDS,
Expand Down Expand Up @@ -120,6 +125,22 @@
PrimaryKeyGrammar=Sequence(
"PRIMARY", "KEY", OneOf("CLUSTERED", "NONCLUSTERED", optional=True)
),
# Overriding SelectClauseSegmentGrammar to remove Delimited logic which assumes statements have been delimited
SelectClauseSegmentGrammar=Sequence(
"SELECT",
Ref("SelectClauseModifierSegment", optional=True),
Indent,
AnyNumberOf(
Sequence(
Ref("SelectClauseElementSegment"),
Ref("CommaSegment"),
),
),
Ref("SelectClauseElementSegment"),
# NB: The Dedent for the indent above lives in the
# SelectStatementSegment so that it sits in the right
# place corresponding to the whitespace.
),
FromClauseTerminatorGrammar=OneOf(
"WHERE",
"LIMIT",
Expand Down Expand Up @@ -155,6 +176,25 @@ class StatementSegment(ansi_dialect.get_segment("StatementSegment")): # type: i
parse_grammar = match_grammar


@tsql_dialect.segment(replace=True)
class SelectClauseElementSegment(BaseSegment):
"""An element in the targets of a select statement.

Overriding ANSI to remove GreedyUntil logic which assumes statements have been delimited
"""

type = "select_clause_element"
# Important to split elements before parsing, otherwise debugging is really hard.
match_grammar = OneOf(
# *, blah.*, blah.blah.*, etc.
Ref("WildcardExpressionSegment"),
Sequence(
Ref("BaseExpressionElementGrammar"),
Ref("AliasExpressionSegment", optional=True),
),
)


@tsql_dialect.segment(replace=True)
class SelectClauseModifierSegment(BaseSegment):
"""Things that come after SELECT but before the columns."""
Expand All @@ -172,6 +212,17 @@ class SelectClauseModifierSegment(BaseSegment):
)


@tsql_dialect.segment(replace=True)
class SelectClauseSegment(BaseSegment):
"""A group of elements in a select target statement.

Overriding ANSI to remove StartsWith logic which assumes statements have been delimited
"""

type = "select_clause"
match_grammar = Ref("SelectClauseSegmentGrammar")


@tsql_dialect.segment(replace=True)
class UnorderedSelectStatementSegment(BaseSegment):
"""A `SELECT` statement without any ORDER clauses or later.
Expand Down Expand Up @@ -215,6 +266,7 @@ class SelectStatementSegment(BaseSegment):
match_grammar = UnorderedSelectStatementSegment.match_grammar.copy(
insert=[
Ref("OrderByClauseSegment", optional=True),
Ref("DelimiterSegment", optional=True),
]
)

Expand Down Expand Up @@ -273,6 +325,7 @@ class CreateIndexStatementSegment(BaseSegment):
),
optional=True,
),
Ref("DelimiterSegment", optional=True),
)


Expand Down Expand Up @@ -310,12 +363,7 @@ class PivotUnpivotStatementSegment(BaseSegment):
"""

type = "from_pivot_expression"
match_grammar = StartsWith(
OneOf("PIVOT", "UNPIVOT"),
terminator=Ref("FromClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)
parse_grammar = Sequence(
match_grammar = Sequence(
OneOf(
Sequence(
"PIVOT",
Expand Down Expand Up @@ -655,12 +703,7 @@ class CreateProcedureStatementSegment(BaseSegment):

type = "create_procedure_statement"

match_grammar = StartsWith(
Sequence(
"CREATE", Sequence("OR", "ALTER", optional=True), OneOf("PROCEDURE", "PROC")
)
)
parse_grammar = Sequence(
match_grammar = Sequence(
"CREATE",
Sequence("OR", "ALTER", optional=True),
OneOf("PROCEDURE", "PROC"),
Expand Down Expand Up @@ -700,6 +743,7 @@ class CreateViewStatementSegment(BaseSegment):
Ref("ObjectReferenceSegment"),
"AS",
Ref("SelectableGrammar"),
Ref("DelimiterSegment", optional=True),
)


Expand Down Expand Up @@ -949,6 +993,7 @@ class CreateTableStatementSegment(BaseSegment):
Ref(
"TableDistributionIndexClause", optional=True
), # Azure Synapse Analytics specific
Ref("DelimiterSegment", optional=True),
)

parse_grammar = match_grammar
Expand Down Expand Up @@ -1048,6 +1093,7 @@ class AlterTableSwitchStatementSegment(BaseSegment):
Bracketed("TRUNCATE_TARGET", Ref("EqualsSegment"), OneOf("ON", "OFF")),
optional=True,
),
Ref("DelimiterSegment", optional=True),
)


Expand Down Expand Up @@ -1131,11 +1177,14 @@ class TransactionStatementSegment(BaseSegment):
"TRANSACTION",
Ref("SingleIdentifierGrammar", optional=True),
Sequence("WITH", "MARK", Ref("QuotedIdentifierSegment"), optional=True),
Ref("DelimiterSegment", optional=True),
),
Sequence(
OneOf("COMMIT", "ROLLBACK"), OneOf("TRANSACTION", "WORK", optional=True)
OneOf("COMMIT", "ROLLBACK"),
OneOf("TRANSACTION", "WORK", optional=True),
Ref("DelimiterSegment", optional=True),
),
Sequence("SAVE", "TRANSACTION"),
Sequence("SAVE", "TRANSACTION", Ref("DelimiterSegment", optional=True)),
)


Expand All @@ -1151,7 +1200,13 @@ class BeginEndSegment(BaseSegment):
match_grammar = Sequence(
"BEGIN",
Indent,
Ref("BatchSegment"),
AnyNumberOf(
OneOf(
Ref("BeginEndSegment"),
Ref("StatementSegment"),
),
min_times=1,
),
Dedent,
"END",
)
Expand All @@ -1163,18 +1218,16 @@ class BatchSegment(BaseSegment):

type = "batch"
match_grammar = OneOf(
# Things that can be bundled
AnyNumberOf(
Ref("BeginEndSegment"),
OneOf(
Ref("BeginEndSegment"),
Ref("StatementSegment"),
),
min_times=1,
),
# Things that can't be bundled
Ref("CreateProcedureStatementSegment"),
Ref("IfExpressionStatement"),
Delimited(
Ref("StatementSegment"),
delimiter=Ref("DelimiterSegment"),
allow_gaps=True,
allow_trailing=True,
),
)


Expand All @@ -1199,3 +1252,122 @@ class FileSegment(BaseFileSegment):
allow_gaps=True,
allow_trailing=True,
)


@tsql_dialect.segment(replace=True)
class DeleteStatementSegment(BaseSegment):
"""A `DELETE` statement.

DELETE FROM <table name> [ WHERE <search condition> ]
Overriding ANSI to remove StartsWith logic which assumes statements have been delimited
"""

type = "delete_statement"
# match grammar. This one makes sense in the context of knowing that it's
# definitely a statement, we just don't know what type yet.
match_grammar = Sequence(
"DELETE",
Ref("FromClauseSegment"),
Ref("WhereClauseSegment", optional=True),
Ref("DelimiterSegment", optional=True),
)


@tsql_dialect.segment(replace=True)
class FromClauseSegment(BaseSegment):
"""A `FROM` clause like in `SELECT`.

NOTE: this is a delimited set of table expressions, with a variable
number of optional join clauses with those table expressions. The
delmited aspect is the higher of the two such that the following is
valid (albeit unusual):

```
SELECT *
FROM a JOIN b, c JOIN d
```

Overriding ANSI to remove Delimited logic which assumes statements have been delimited
"""

type = "from_clause"
match_grammar = Sequence(
"FROM",
AnyNumberOf(
Sequence(
Ref("FromExpressionSegment"),
Ref("CommaSegment"),
),
),
Ref("FromExpressionSegment"),
Ref("DelimiterSegment", optional=True),
)

def get_eventual_aliases(self) -> List[Tuple[BaseSegment, AliasInfo]]:
"""List the eventual aliases of this from clause.

Comes as a list of tuples (table expr, tuple (string, segment, bool)).
"""
buff = []
direct_table_children = []
join_clauses = []

for from_expression in self.get_children("from_expression"):
direct_table_children += from_expression.get_children(
"from_expression_element"
)
join_clauses += from_expression.get_children("join_clause")

# Iterate through the potential sources of aliases
for clause in (*direct_table_children, *join_clauses):
ref: AliasInfo = clause.get_eventual_alias()
# Only append if non null. A None reference, may
# indicate a generator expression or similar.
table_expr = (
clause
if clause in direct_table_children
else clause.get_child("from_expression_element")
)
if ref:
buff.append((table_expr, ref))
return buff
jpers36 marked this conversation as resolved.
Show resolved Hide resolved


@tsql_dialect.segment(replace=True)
class OrderByClauseSegment(BaseSegment):
"""A `ORDER BY` clause like in `SELECT`.

Overriding ANSI to remove StartsWith logic which assumes statements have been delimited
"""

type = "orderby_clause"
match_grammar = Sequence(
"ORDER",
"BY",
Indent,
Sequence(
OneOf(
Ref("ColumnReferenceSegment"),
# Can `ORDER BY 1`
Ref("NumericLiteralSegment"),
# Can order by an expression
Ref("ExpressionSegment"),
),
OneOf("ASC", "DESC", optional=True),
),
AnyNumberOf(
Ref("CommaSegment"),
Sequence(
OneOf(
Ref("ColumnReferenceSegment"),
# Can `ORDER BY 1`
Ref("NumericLiteralSegment"),
# Can order by an expression
Ref("ExpressionSegment"),
),
OneOf("ASC", "DESC", optional=True),
),
),
Dedent,
Ref("DelimiterSegment", optional=True),
)