New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prevent rules incorrectly returning conflicting fixes to same position #2830
Changes from 21 commits
4de2882
65a95b0
e51aa4b
9e8a5f0
89dbc95
bf87ff9
12a9383
119400d
c75c1bd
b5548b4
2e5561e
a2b5424
c248845
b43748e
255a5d0
6c4b0fa
dfd16b0
bc2b727
b6435e7
49db007
d736c60
4fc8f8d
8aa9170
76f8ba1
6548269
dfdd131
ffd746c
38a0b0e
808d92d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,12 @@ | |
analysis. | ||
""" | ||
|
||
from collections import defaultdict | ||
from collections.abc import MutableSet | ||
from copy import deepcopy | ||
from dataclasses import replace | ||
from dataclasses import dataclass, field, replace | ||
from io import StringIO | ||
from typing import Any, Callable, Optional, List, Tuple, NamedTuple, Iterator | ||
from typing import Any, Callable, Dict, Optional, List, Tuple, NamedTuple, Iterator | ||
import logging | ||
|
||
from tqdm import tqdm | ||
|
@@ -50,6 +52,44 @@ class FixPatch(NamedTuple): | |
patch_category: str | ||
|
||
|
||
@dataclass | ||
class AnchorEditInfo: | ||
"""For a given fix anchor, count of the fix edit types and fixes for it.""" | ||
|
||
delete: int = field(default=0) | ||
replace: int = field(default=0) | ||
create_before: int = field(default=0) | ||
create_after: int = field(default=0) | ||
fixes: List = field(default_factory=list) | ||
|
||
def add(self, fix): | ||
"""Adds the fix and updates stats.""" | ||
self.fixes.append(fix) | ||
setattr(self, fix.edit_type, getattr(self, fix.edit_type) + 1) | ||
|
||
@property | ||
def total(self): | ||
"""Returns total count of fixes.""" | ||
return len(self.fixes) | ||
|
||
@property | ||
def is_valid(self): | ||
"""Returns True if valid combination of fixes for anchor. | ||
|
||
Cases: | ||
* 1 fix of any type: Valid | ||
* 2 fixes: Valid if and only if types are create_before and create_after | ||
""" | ||
if self.total <= 1: | ||
# Definitely no duplicates if <= 1. | ||
return True | ||
if self.total != 2: | ||
# Definitely duplicates if > 2. | ||
return False | ||
# Special case: Ok to create before and after same segment. | ||
return self.create_before == 1 and self.create_after == 1 | ||
|
||
|
||
class BaseSegment: | ||
"""The base segment element. | ||
|
||
|
@@ -1008,13 +1048,21 @@ def apply_fixes(self, dialect, rule_code, fixes): | |
else: | ||
seg = todo_buffer.pop(0) | ||
|
||
fix_buff = fixes.copy() | ||
unused_fixes = [] | ||
while fix_buff: | ||
f = fix_buff.pop() | ||
# Look for identity not just equality. | ||
# This handles potential positioning ambiguity. | ||
if f.anchor is seg: | ||
# Look for identity not just equality. | ||
# This handles potential positioning ambiguity. | ||
anchor_info: Optional[AnchorEditInfo] = fixes.pop(id(seg), None) | ||
if anchor_info is not None: | ||
seg_fixes = anchor_info.fixes | ||
if ( | ||
len(seg_fixes) == 2 | ||
and seg_fixes[0].edit_type == "create_after" | ||
): # pragma: no cover | ||
# Must be create_before & create_after. Swap so the | ||
# "before" comes first. | ||
seg_fixes.reverse() | ||
|
||
for f in anchor_info.fixes: | ||
assert f.anchor is seg | ||
fixes_applied.append(f) | ||
linter_logger.debug( | ||
"Matched fix against segment: %s -> %s", f, seg | ||
|
@@ -1027,9 +1075,13 @@ def apply_fixes(self, dialect, rule_code, fixes): | |
"create_before", | ||
"create_after", | ||
): | ||
if f.edit_type == "create_after": | ||
# in the case of a creation after, also add this | ||
# segment before the edit. | ||
if ( | ||
f.edit_type == "create_after" | ||
and len(anchor_info.fixes) == 1 | ||
): | ||
# in the case of a creation after that is not part | ||
# of a create_before/create_after pair, also add | ||
# this segment before the edit. | ||
seg_buffer.append(seg) | ||
|
||
# We're doing a replacement (it could be a single | ||
|
@@ -1051,18 +1103,8 @@ def apply_fixes(self, dialect, rule_code, fixes): | |
f.edit_type, f | ||
) | ||
) | ||
# We've applied a fix here. Move on, this also consumes the | ||
# fix | ||
# TODO: Maybe deal with overlapping fixes later. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Delete this comment now that we're dealing with them (by warning and discarding) |
||
break | ||
else: | ||
# We've not used the fix so we should keep it in the list | ||
# for later. | ||
unused_fixes.append(f) | ||
else: | ||
seg_buffer.append(seg) | ||
# Switch over the the unused list | ||
fixes = unused_fixes + fix_buff | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note how this tricky "end of loop" logic all goes away now that we're using a dictionary instead. |
||
# Invalidate any caches | ||
self.invalidate_caches() | ||
|
||
|
@@ -1090,6 +1132,17 @@ def apply_fixes(self, dialect, rule_code, fixes): | |
else: | ||
return self, fixes | ||
|
||
@classmethod | ||
def compute_anchor_edit_info(cls, fixes) -> Dict[int, AnchorEditInfo]: | ||
"""Group and count fixes by anchor, return dictionary.""" | ||
anchor_info = defaultdict(AnchorEditInfo) # type: ignore | ||
for fix in fixes: | ||
# :TRICKY: Use segment id() as the dictionary key since | ||
# different segments may compare as equal. | ||
anchor_id = id(fix.anchor) | ||
anchor_info[anchor_id].add(fix) | ||
return dict(anchor_info) | ||
|
||
def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segment): | ||
"""Checks correctness of new segment against match or parse grammar.""" | ||
root_parse_context = RootParseContext(dialect=dialect) | ||
|
@@ -1117,7 +1170,7 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme | |
|
||
@staticmethod | ||
def _log_apply_fixes_check_issue(message, *args): # pragma: no cover | ||
linter_logger.warning(message, *args) | ||
linter_logger.critical(message, *args) | ||
|
||
def iter_patches(self, templated_str: str) -> Iterator[FixPatch]: | ||
"""Iterate through the segments generating fix patches. | ||
|
@@ -1336,3 +1389,41 @@ def get_table_references(self): | |
for stmt in self.get_children("statement"): | ||
references |= stmt.get_table_references() | ||
return references | ||
|
||
|
||
class IdentitySet(MutableSet): | ||
"""Similar to built-in set(), but based on object IDENTITY. | ||
|
||
This is often important when working with BaseSegment and other types, | ||
where different object instances may compare as equal. | ||
|
||
Copied from: https://stackoverflow.com/questions/16994307/identityset-in-python | ||
""" | ||
|
||
key = id # should return a hashable object | ||
|
||
def __init__(self, iterable=()): | ||
self.map = {} # id -> object | ||
self |= iterable # add elements from iterable to the set (union) | ||
|
||
def __len__(self): # Sized | ||
return len(self.map) | ||
|
||
def __iter__(self): # Iterable | ||
return self.map.values().__iter__() # pragma: no cover | ||
|
||
def __contains__(self, x): # Container | ||
return self.key(x) in self.map | ||
|
||
def add(self, value): # MutableSet | ||
"""Add an element.""" | ||
self.map[self.key(value)] = value | ||
|
||
def discard(self, value): # MutableSet | ||
"""Remove an element. Do not raise an exception if absent.""" | ||
self.map.pop(self.key(value), None) # pragma: no cover | ||
|
||
def __repr__(self): # pragma: no cover | ||
if not self: | ||
return "%s()" % (self.__class__.__name__,) | ||
return "%s(%r)" % (self.__class__.__name__, list(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from sqlfluff.core.parser import WhitespaceSegment | ||
|
||
from sqlfluff.core.parser import BaseSegment, NewlineSegment | ||
from sqlfluff.core.parser.segments.base import IdentitySet | ||
from sqlfluff.core.rules.base import BaseRule, LintFix, LintResult, RuleContext | ||
from sqlfluff.core.rules.doc_decorators import document_fix_compatible | ||
from sqlfluff.core.rules.functional import Segments | ||
|
@@ -254,6 +255,7 @@ def _eval_single_select_target_element( | |
|
||
def _fixes_for_move_after_select_clause( | ||
stop_seg: BaseSegment, | ||
delete_segments: Optional[Segments] = None, | ||
add_newline: bool = True, | ||
) -> List[LintFix]: | ||
"""Cleans up by moving leftover select_clause segments. | ||
|
@@ -277,10 +279,25 @@ def _fixes_for_move_after_select_clause( | |
start_seg=start_seg, | ||
stop_seg=stop_seg, | ||
) | ||
fixes = [ | ||
LintFix.delete(seg) for seg in move_after_select_clause | ||
# :TRICKY: Below, we have a couple places where we | ||
# filter to guard against deleting the same segment | ||
# multiple times -- this is illegal. | ||
# :TRICKY: Use IdentitySet rather than set() since | ||
# different segments may compare as equal. | ||
all_deletes = IdentitySet( | ||
fix.anchor for fix in fixes if fix.edit_type == "delete" | ||
) | ||
fixes_ = [] | ||
for seg in delete_segments or []: | ||
if seg not in all_deletes: | ||
fixes.append(LintFix.delete(seg)) | ||
all_deletes.add(seg) | ||
fixes_ += [ | ||
LintFix.delete(seg) | ||
for seg in move_after_select_clause | ||
if seg not in all_deletes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tunetheweb: I'd like your thoughts on a question. This PR adds a new feature that prohibits fixes with the same anchor with one exception: It's okay to have 2 fixes, one Should we also allow multiple deletes of the same segment? There's no ambiguity there, and it would avoid needing to "fix" this rule as well as L053. It's kind of a philosophical question. If we decide to allow multiple deletes, we make it easier on rule writers. On the other hand, we're letting them be a bit sloppy. 🤷♂️ I could be convinced either way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm.... I think safer to not allow that. In theory it shouldn't be needed so the developer ease is not a strong enough argument in my mind. And curious why L036 currently does it? Looked at the code but wasn't immediately apparent to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's not a "good" reason, and I am not that familiar with L036 details, but basically, there are a couple places that delete unnecessary whitespace, and there was no bookkeeping, so in some cases, the same whitespace gets deleted twice. Happy to leave the fix checker "as is" for now (i.e. not allow multiple deletes). |
||
] | ||
fixes.append( | ||
fixes_.append( | ||
LintFix.create_after( | ||
self._choose_anchor_segment( | ||
context, | ||
|
@@ -292,7 +309,7 @@ def _fixes_for_move_after_select_clause( | |
+ list(move_after_select_clause), | ||
) | ||
) | ||
return fixes | ||
return fixes_ | ||
|
||
if select_stmt.segments[after_select_clause_idx].is_type("newline"): | ||
# Since we're deleting the newline, we should also delete all | ||
|
@@ -304,10 +321,7 @@ def _fixes_for_move_after_select_clause( | |
loop_while=sp.is_type("whitespace"), | ||
start_seg=select_children[start_idx], | ||
) | ||
fixes += [LintFix.delete(seg) for seg in to_delete] | ||
|
||
if to_delete: | ||
|
||
# The select_clause is immediately followed by a | ||
# newline. Delete the newline in order to avoid leaving | ||
# behind an empty line after fix, *unless* we stopped | ||
|
@@ -325,15 +339,15 @@ def _fixes_for_move_after_select_clause( | |
) | ||
|
||
fixes += _fixes_for_move_after_select_clause( | ||
to_delete[-1], | ||
to_delete[-1], to_delete | ||
) | ||
elif select_stmt.segments[after_select_clause_idx].is_type( | ||
"whitespace" | ||
): | ||
# The select_clause has stuff after (most likely a comment) | ||
# Delete the whitespace immediately after the select clause | ||
# so the other stuff aligns nicely based on where the select | ||
# clause started | ||
# clause started. | ||
fixes += [ | ||
LintFix.delete( | ||
select_stmt.segments[after_select_clause_idx], | ||
|
@@ -354,11 +368,10 @@ def _fixes_for_move_after_select_clause( | |
loop_while=sp.is_type("whitespace"), | ||
start_seg=select_children[select_clause_idx - 1], | ||
) | ||
fixes += [LintFix.delete(seg) for seg in to_delete] | ||
|
||
if to_delete: | ||
fixes += _fixes_for_move_after_select_clause( | ||
to_delete[-1], | ||
to_delete, | ||
# If we deleted a newline, create a newline. | ||
any(seg for seg in to_delete if seg.is_type("newline")), | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Below, I reworked the core
apply_fixes()
logic:AnchorEditInfo
rather than a list of fixescreate_before
andcreate_after
the same anchor (new requirement for L053)Bonus: The new logic is more straightforward (dictionary lookup/removal versus making copies of lists and moving things between them). It's probably more efficient as well -- the old logic was scanning all the unused fixes each time, to try and match it against the current segment.