Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent rules incorrectly returning conflicting fixes to same position #2830

Merged
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4de2882
Fix L052 bug deleting space after Snowflake SET statement
Mar 9, 2022
65a95b0
Make a similar fix elsewhere in L052, DRY up the code
Mar 9, 2022
e51aa4b
Update tests to raise error on fixes with duplicate anchors
Mar 9, 2022
9e8a5f0
Merge branch 'main' into bhart-issue_2827_l052_duplicate_anchors
barrywhart Mar 9, 2022
89dbc95
Fix L036 bug: Deleting the same segment multiple times
Mar 9, 2022
bf87ff9
Rework the fixes to use object *identity*, not *equality*
Mar 9, 2022
12a9383
Comments
Mar 9, 2022
119400d
Update L052 to use IdentitySet
Mar 9, 2022
c75c1bd
Coverage checker
Mar 9, 2022
b5548b4
Update src/sqlfluff/core/linter/linter.py
barrywhart Mar 9, 2022
2e5561e
Update test/fixtures/rules/std_rule_cases/L052.yml
barrywhart Mar 9, 2022
a2b5424
PR review: Raise log level for these messages to CRITICAL
Mar 9, 2022
c248845
Add comment per code review
Mar 9, 2022
b43748e
Update test/fixtures/rules/std_rule_cases/L052.yml
tunetheweb Mar 9, 2022
255a5d0
Bug fix to IdentitySet
Mar 10, 2022
6c4b0fa
Update L039 to avoid returning multiple fixes with same anchors
Mar 10, 2022
dfd16b0
Add test case for fixed L039 issue
Mar 10, 2022
bc2b727
Fix one of the post-fix parse error bugs in L053
Mar 10, 2022
b6435e7
Fix L053 bug, allow create_before + create_after for same anchor
Mar 10, 2022
49db007
Tweaks
Mar 10, 2022
d736c60
Fix type annotation
Mar 10, 2022
4fc8f8d
Comments, tweaks
Mar 10, 2022
8aa9170
Fix bug in L050, fix missing space in message
Mar 10, 2022
76f8ba1
Remove commented-out code
Mar 10, 2022
6548269
Add "pragma: no cover"
Mar 10, 2022
dfdd131
Add "pragma: no cover"
Mar 10, 2022
ffd746c
PR review
Mar 10, 2022
38a0b0e
More L039 comments
Mar 10, 2022
808d92d
Discard fixes from lint results if tehre are conflicts
Mar 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/sqlfluff/core/linter/linter.py
Expand Up @@ -329,6 +329,12 @@ def remove_templated_errors(
result.append(e)
return result

@staticmethod
def _report_duplicate_anchors_error(message: str): # pragma: no cover
# This function exists primarily in order to let us monkeypatch it at
# runtime (replacing it with a function that raises an exception).
linter_logger.critical(message)

@staticmethod
def _warn_unfixable(code: str):
linter_logger.warning(
Expand Down Expand Up @@ -514,12 +520,22 @@ def lint_fix_parsed(
if fix and fixes:
linter_logger.info(f"Applying Fixes [{crawler.code}]: {fixes}")
# Do some sanity checks on the fixes before applying.
if fixes == last_fixes: # pragma: no cover
anchor_info = BaseSegment.compute_anchor_edit_info(fixes)
if any(
not info.is_valid for info in anchor_info.values()
): # pragma: no cover
message = (
f"Rule {crawler.code} returned multiple fixes with the same"
f"anchor. This is only supported for create_before+"
f"create_after, so the fixes will not be applied. {fixes!r}"
)
cls._report_duplicate_anchors_error(message)
elif fixes == last_fixes: # pragma: no cover
cls._warn_unfixable(crawler.code)
else:
last_fixes = fixes
new_tree, _ = tree.apply_fixes(
config.get("dialect_obj"), crawler.code, fixes
config.get("dialect_obj"), crawler.code, anchor_info
)
# Check for infinite loops
if new_tree.raw not in previous_versions:
Expand Down
137 changes: 114 additions & 23 deletions src/sqlfluff/core/parser/segments/base.py
Expand Up @@ -8,10 +8,12 @@
analysis.
"""

from collections import defaultdict
from collections.abc import MutableSet
from copy import deepcopy
from dataclasses import replace
from dataclasses import dataclass, field, replace
from io import StringIO
from typing import Any, Callable, Optional, List, Tuple, NamedTuple, Iterator
from typing import Any, Callable, Dict, Optional, List, Tuple, NamedTuple, Iterator
import logging

from tqdm import tqdm
Expand Down Expand Up @@ -50,6 +52,44 @@ class FixPatch(NamedTuple):
patch_category: str


@dataclass
class AnchorEditInfo:
"""For a given fix anchor, count of the fix edit types and fixes for it."""

delete: int = field(default=0)
replace: int = field(default=0)
create_before: int = field(default=0)
create_after: int = field(default=0)
fixes: List = field(default_factory=list)

def add(self, fix):
"""Adds the fix and updates stats."""
self.fixes.append(fix)
setattr(self, fix.edit_type, getattr(self, fix.edit_type) + 1)

@property
def total(self):
"""Returns total count of fixes."""
return len(self.fixes)

@property
def is_valid(self):
"""Returns True if valid combination of fixes for anchor.

Cases:
* 1 fix of any type: Valid
* 2 fixes: Valid if and only if types are create_before and create_after
"""
if self.total <= 1:
# Definitely no duplicates if <= 1.
return True
if self.total != 2:
# Definitely duplicates if > 2.
return False
# Special case: Ok to create before and after same segment.
return self.create_before == 1 and self.create_after == 1


class BaseSegment:
"""The base segment element.

Expand Down Expand Up @@ -1008,13 +1048,21 @@ def apply_fixes(self, dialect, rule_code, fixes):
else:
seg = todo_buffer.pop(0)

fix_buff = fixes.copy()
unused_fixes = []
while fix_buff:
f = fix_buff.pop()
# Look for identity not just equality.
# This handles potential positioning ambiguity.
if f.anchor is seg:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below, I reworked the core apply_fixes() logic:

  • Consumes a dictionary of AnchorEditInfo rather than a list of fixes
  • Add the ability to handle create_before and create_after the same anchor (new requirement for L053)

Bonus: The new logic is more straightforward (dictionary lookup/removal versus making copies of lists and moving things between them). It's probably more efficient as well -- the old logic was scanning all the unused fixes each time, to try and match it against the current segment.

# Look for identity not just equality.
# This handles potential positioning ambiguity.
anchor_info: Optional[AnchorEditInfo] = fixes.pop(id(seg), None)
if anchor_info is not None:
seg_fixes = anchor_info.fixes
if (
len(seg_fixes) == 2
and seg_fixes[0].edit_type == "create_after"
): # pragma: no cover
# Must be create_before & create_after. Swap so the
# "before" comes first.
seg_fixes.reverse()

for f in anchor_info.fixes:
assert f.anchor is seg
fixes_applied.append(f)
linter_logger.debug(
"Matched fix against segment: %s -> %s", f, seg
Expand All @@ -1027,9 +1075,13 @@ def apply_fixes(self, dialect, rule_code, fixes):
"create_before",
"create_after",
):
if f.edit_type == "create_after":
# in the case of a creation after, also add this
# segment before the edit.
if (
f.edit_type == "create_after"
and len(anchor_info.fixes) == 1
):
# in the case of a creation after that is not part
# of a create_before/create_after pair, also add
# this segment before the edit.
seg_buffer.append(seg)

# We're doing a replacement (it could be a single
Expand All @@ -1051,18 +1103,8 @@ def apply_fixes(self, dialect, rule_code, fixes):
f.edit_type, f
)
)
# We've applied a fix here. Move on, this also consumes the
# fix
# TODO: Maybe deal with overlapping fixes later.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete this comment now that we're dealing with them (by warning and discarding)

break
else:
# We've not used the fix so we should keep it in the list
# for later.
unused_fixes.append(f)
else:
seg_buffer.append(seg)
# Switch over the the unused list
fixes = unused_fixes + fix_buff
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note how this tricky "end of loop" logic all goes away now that we're using a dictionary instead.

# Invalidate any caches
self.invalidate_caches()

Expand Down Expand Up @@ -1090,6 +1132,17 @@ def apply_fixes(self, dialect, rule_code, fixes):
else:
return self, fixes

@classmethod
def compute_anchor_edit_info(cls, fixes) -> Dict[int, AnchorEditInfo]:
"""Group and count fixes by anchor, return dictionary."""
anchor_info = defaultdict(AnchorEditInfo) # type: ignore
for fix in fixes:
# :TRICKY: Use segment id() as the dictionary key since
# different segments may compare as equal.
anchor_id = id(fix.anchor)
anchor_info[anchor_id].add(fix)
return dict(anchor_info)

def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segment):
"""Checks correctness of new segment against match or parse grammar."""
root_parse_context = RootParseContext(dialect=dialect)
Expand Down Expand Up @@ -1117,7 +1170,7 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme

@staticmethod
def _log_apply_fixes_check_issue(message, *args): # pragma: no cover
linter_logger.warning(message, *args)
linter_logger.critical(message, *args)

def iter_patches(self, templated_str: str) -> Iterator[FixPatch]:
"""Iterate through the segments generating fix patches.
Expand Down Expand Up @@ -1336,3 +1389,41 @@ def get_table_references(self):
for stmt in self.get_children("statement"):
references |= stmt.get_table_references()
return references


class IdentitySet(MutableSet):
"""Similar to built-in set(), but based on object IDENTITY.

This is often important when working with BaseSegment and other types,
where different object instances may compare as equal.

Copied from: https://stackoverflow.com/questions/16994307/identityset-in-python
"""

key = id # should return a hashable object

def __init__(self, iterable=()):
self.map = {} # id -> object
self |= iterable # add elements from iterable to the set (union)

def __len__(self): # Sized
return len(self.map)

def __iter__(self): # Iterable
return self.map.values().__iter__() # pragma: no cover

def __contains__(self, x): # Container
return self.key(x) in self.map

def add(self, value): # MutableSet
"""Add an element."""
self.map[self.key(value)] = value

def discard(self, value): # MutableSet
"""Remove an element. Do not raise an exception if absent."""
self.map.pop(self.key(value), None) # pragma: no cover

def __repr__(self): # pragma: no cover
if not self:
return "%s()" % (self.__class__.__name__,)
return "%s(%r)" % (self.__class__.__name__, list(self))
35 changes: 24 additions & 11 deletions src/sqlfluff/rules/L036.py
Expand Up @@ -5,6 +5,7 @@
from sqlfluff.core.parser import WhitespaceSegment

from sqlfluff.core.parser import BaseSegment, NewlineSegment
from sqlfluff.core.parser.segments.base import IdentitySet
from sqlfluff.core.rules.base import BaseRule, LintFix, LintResult, RuleContext
from sqlfluff.core.rules.doc_decorators import document_fix_compatible
from sqlfluff.core.rules.functional import Segments
Expand Down Expand Up @@ -254,6 +255,7 @@ def _eval_single_select_target_element(

def _fixes_for_move_after_select_clause(
stop_seg: BaseSegment,
delete_segments: Optional[Segments] = None,
add_newline: bool = True,
) -> List[LintFix]:
"""Cleans up by moving leftover select_clause segments.
Expand All @@ -277,10 +279,25 @@ def _fixes_for_move_after_select_clause(
start_seg=start_seg,
stop_seg=stop_seg,
)
fixes = [
LintFix.delete(seg) for seg in move_after_select_clause
# :TRICKY: Below, we have a couple places where we
# filter to guard against deleting the same segment
# multiple times -- this is illegal.
# :TRICKY: Use IdentitySet rather than set() since
# different segments may compare as equal.
all_deletes = IdentitySet(
fix.anchor for fix in fixes if fix.edit_type == "delete"
)
fixes_ = []
for seg in delete_segments or []:
if seg not in all_deletes:
fixes.append(LintFix.delete(seg))
all_deletes.add(seg)
fixes_ += [
LintFix.delete(seg)
for seg in move_after_select_clause
if seg not in all_deletes
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tunetheweb: I'd like your thoughts on a question. This PR adds a new feature that prohibits fixes with the same anchor with one exception: It's okay to have 2 fixes, one create_before and one create_after.

Should we also allow multiple deletes of the same segment? There's no ambiguity there, and it would avoid needing to "fix" this rule as well as L053.

It's kind of a philosophical question. If we decide to allow multiple deletes, we make it easier on rule writers. On the other hand, we're letting them be a bit sloppy. 🤷‍♂️ I could be convinced either way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm.... I think safer to not allow that. In theory it shouldn't be needed so the developer ease is not a strong enough argument in my mind. And curious why L036 currently does it? Looked at the code but wasn't immediately apparent to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's not a "good" reason, and I am not that familiar with L036 details, but basically, there are a couple places that delete unnecessary whitespace, and there was no bookkeeping, so in some cases, the same whitespace gets deleted twice.

Happy to leave the fix checker "as is" for now (i.e. not allow multiple deletes).

]
fixes.append(
fixes_.append(
LintFix.create_after(
self._choose_anchor_segment(
context,
Expand All @@ -292,7 +309,7 @@ def _fixes_for_move_after_select_clause(
+ list(move_after_select_clause),
)
)
return fixes
return fixes_

if select_stmt.segments[after_select_clause_idx].is_type("newline"):
# Since we're deleting the newline, we should also delete all
Expand All @@ -304,10 +321,7 @@ def _fixes_for_move_after_select_clause(
loop_while=sp.is_type("whitespace"),
start_seg=select_children[start_idx],
)
fixes += [LintFix.delete(seg) for seg in to_delete]

if to_delete:

# The select_clause is immediately followed by a
# newline. Delete the newline in order to avoid leaving
# behind an empty line after fix, *unless* we stopped
Expand All @@ -325,15 +339,15 @@ def _fixes_for_move_after_select_clause(
)

fixes += _fixes_for_move_after_select_clause(
to_delete[-1],
to_delete[-1], to_delete
)
elif select_stmt.segments[after_select_clause_idx].is_type(
"whitespace"
):
# The select_clause has stuff after (most likely a comment)
# Delete the whitespace immediately after the select clause
# so the other stuff aligns nicely based on where the select
# clause started
# clause started.
fixes += [
LintFix.delete(
select_stmt.segments[after_select_clause_idx],
Expand All @@ -354,11 +368,10 @@ def _fixes_for_move_after_select_clause(
loop_while=sp.is_type("whitespace"),
start_seg=select_children[select_clause_idx - 1],
)
fixes += [LintFix.delete(seg) for seg in to_delete]

if to_delete:
fixes += _fixes_for_move_after_select_clause(
to_delete[-1],
to_delete,
# If we deleted a newline, create a newline.
any(seg for seg in to_delete if seg.is_type("newline")),
)
Expand Down
27 changes: 25 additions & 2 deletions src/sqlfluff/rules/L039.py
Expand Up @@ -2,7 +2,7 @@
from typing import List, Optional

from sqlfluff.core.parser import WhitespaceSegment

from sqlfluff.core.parser.segments.base import IdentitySet
from sqlfluff.core.rules.base import BaseRule, LintFix, LintResult, RuleContext
from sqlfluff.core.rules.doc_decorators import document_fix_compatible
from sqlfluff.core.rules.functional import sp
Expand Down Expand Up @@ -38,6 +38,11 @@ def _eval(self, context: RuleContext) -> Optional[List[LintResult]]:
prev_newline = True
prev_whitespace = None
violations = []
memory = context.memory
if not memory:
# Use memory to avoid returning multiple fixes with the same anchor.
# (This is illegal.)
memory = dict(fix_anchors=IdentitySet())
for seg in context.segment.segments:
if seg.is_meta:
continue
Expand Down Expand Up @@ -119,4 +124,22 @@ def _eval(self, context: RuleContext) -> Optional[List[LintResult]]:
)
)

return violations or None
final_violations = []
if violations:
# Check each violation. If any of its fixes reuses a past anchor,
# discard it.
for violation in violations:
if not any(
[
fix
for fix in violation.fixes
if fix.anchor in memory["fix_anchors"]
]
):
final_violations.append(violation)
for fix in violation.fixes:
memory["fix_anchors"].add(fix.anchor)
if not final_violations:
final_violations.append(LintResult())
final_violations[-1].memory = memory
return final_violations