sqlfluff · barrywhart · Mar 10, 2022 · Mar 9, 2022 · Mar 9, 2022 · Mar 9, 2022
diff --git a/src/sqlfluff/core/linter/linter.py b/src/sqlfluff/core/linter/linter.py
@@ -329,6 +329,12 @@ def remove_templated_errors(
                 result.append(e)
         return result
 
+    @staticmethod
+    def _report_duplicate_anchors_error(message: str):  # pragma: no cover
+        # This function exists primarily in order to let us monkeypatch it at
+        # runtime (replacing it with a function that raises an exception).
+        linter_logger.critical(message)
+
     @staticmethod
     def _warn_unfixable(code: str):
         linter_logger.warning(
@@ -514,12 +520,22 @@ def lint_fix_parsed(
                 if fix and fixes:
                     linter_logger.info(f"Applying Fixes [{crawler.code}]: {fixes}")
                     # Do some sanity checks on the fixes before applying.
-                    if fixes == last_fixes:  # pragma: no cover
+                    anchor_info = BaseSegment.compute_anchor_edit_info(fixes)
+                    if any(
+                        not info.is_valid for info in anchor_info.values()
+                    ):  # pragma: no cover
+                        message = (
+                            f"Rule {crawler.code} returned multiple fixes with the "
+                            f"same anchor. This is only supported for create_before+"
+                            f"create_after, so the fixes will not be applied. {fixes!r}"
+                        )
+                        cls._report_duplicate_anchors_error(message)
+                    elif fixes == last_fixes:  # pragma: no cover
                         cls._warn_unfixable(crawler.code)
                     else:
                         last_fixes = fixes
                         new_tree, _ = tree.apply_fixes(
-                            config.get("dialect_obj"), crawler.code, fixes
+                            config.get("dialect_obj"), crawler.code, anchor_info
                         )
                         # Check for infinite loops
                         if new_tree.raw not in previous_versions:

diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py
@@ -8,10 +8,12 @@
   analysis.
 """
 
+from collections import defaultdict
+from collections.abc import MutableSet
 from copy import deepcopy
-from dataclasses import replace
+from dataclasses import dataclass, field, replace
 from io import StringIO
-from typing import Any, Callable, Optional, List, Tuple, NamedTuple, Iterator
+from typing import Any, Callable, Dict, Optional, List, Tuple, NamedTuple, Iterator
 import logging
 
 from tqdm import tqdm
@@ -50,6 +52,44 @@ class FixPatch(NamedTuple):
     patch_category: str
 
 
+@dataclass
+class AnchorEditInfo:
+    """For a given fix anchor, count of the fix edit types and fixes for it."""
+
+    delete: int = field(default=0)
+    replace: int = field(default=0)
+    create_before: int = field(default=0)
+    create_after: int = field(default=0)
+    fixes: List = field(default_factory=list)
+
+    def add(self, fix):
+        """Adds the fix and updates stats."""
+        self.fixes.append(fix)
+        setattr(self, fix.edit_type, getattr(self, fix.edit_type) + 1)
+
+    @property
+    def total(self):
+        """Returns total count of fixes."""
+        return len(self.fixes)
+
+    @property
+    def is_valid(self):
+        """Returns True if valid combination of fixes for anchor.
+
+        Cases:
+        * 1 fix of any type: Valid
+        * 2 fixes: Valid if and only if types are create_before and create_after
+        """
+        if self.total <= 1:
+            # Definitely no duplicates if <= 1.
+            return True
+        if self.total != 2:
+            # Definitely duplicates if > 2.
+            return False
+        # Special case: Ok to create before and after same segment.
+        return self.create_before == 1 and self.create_after == 1
+
+
 class BaseSegment:
     """The base segment element.
 
@@ -1008,13 +1048,21 @@ def apply_fixes(self, dialect, rule_code, fixes):
                 else:
                     seg = todo_buffer.pop(0)
 
-                    fix_buff = fixes.copy()
-                    unused_fixes = []
-                    while fix_buff:
-                        f = fix_buff.pop()
-                        # Look for identity not just equality.
-                        # This handles potential positioning ambiguity.
-                        if f.anchor is seg:
+                    # Look for identity not just equality.
+                    # This handles potential positioning ambiguity.
+                    anchor_info: Optional[AnchorEditInfo] = fixes.pop(id(seg), None)
+                    if anchor_info is not None:
+                        seg_fixes = anchor_info.fixes
+                        if (
+                            len(seg_fixes) == 2
+                            and seg_fixes[0].edit_type == "create_after"
+                        ):  # pragma: no cover
+                            # Must be create_before & create_after. Swap so the
+                            # "before" comes first.
+                            seg_fixes.reverse()
+
+                        for f in anchor_info.fixes:
+                            assert f.anchor is seg
                             fixes_applied.append(f)
                             linter_logger.debug(
                                 "Matched fix against segment: %s -> %s", f, seg
@@ -1027,9 +1075,13 @@ def apply_fixes(self, dialect, rule_code, fixes):
                                 "create_before",
                                 "create_after",
                             ):
-                                if f.edit_type == "create_after":
-                                    # in the case of a creation after, also add this
-                                    # segment before the edit.
+                                if (
+                                    f.edit_type == "create_after"
+                                    and len(anchor_info.fixes) == 1
+                                ):
+                                    # in the case of a creation after that is not part
+                                    # of a create_before/create_after pair, also add
+                                    # this segment before the edit.
                                     seg_buffer.append(seg)
 
                                 # We're doing a replacement (it could be a single
@@ -1051,18 +1103,8 @@ def apply_fixes(self, dialect, rule_code, fixes):
                                         f.edit_type, f
                                     )
                                 )
-                            # We've applied a fix here. Move on, this also consumes the
-                            # fix
-                            # TODO: Maybe deal with overlapping fixes later.
-                            break
-                        else:
-                            # We've not used the fix so we should keep it in the list
-                            # for later.
-                            unused_fixes.append(f)
                     else:
                         seg_buffer.append(seg)
-                # Switch over the the unused list
-                fixes = unused_fixes + fix_buff
                 # Invalidate any caches
                 self.invalidate_caches()
 
@@ -1090,6 +1132,17 @@ def apply_fixes(self, dialect, rule_code, fixes):
         else:
             return self, fixes
 
+    @classmethod
+    def compute_anchor_edit_info(cls, fixes) -> Dict[int, AnchorEditInfo]:
+        """Group and count fixes by anchor, return dictionary."""
+        anchor_info = defaultdict(AnchorEditInfo)  # type: ignore
+        for fix in fixes:
+            # :TRICKY: Use segment id() as the dictionary key since
+            # different segments may compare as equal.
+            anchor_id = id(fix.anchor)
+            anchor_info[anchor_id].add(fix)
+        return dict(anchor_info)
+
     def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segment):
         """Checks correctness of new segment against match or parse grammar."""
         root_parse_context = RootParseContext(dialect=dialect)
@@ -1117,7 +1170,7 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme
 
     @staticmethod
     def _log_apply_fixes_check_issue(message, *args):  # pragma: no cover
-        linter_logger.warning(message, *args)
+        linter_logger.critical(message, *args)
 
     def iter_patches(self, templated_str: str) -> Iterator[FixPatch]:
         """Iterate through the segments generating fix patches.
@@ -1336,3 +1389,41 @@ def get_table_references(self):
         for stmt in self.get_children("statement"):
             references |= stmt.get_table_references()
         return references
+
+
+class IdentitySet(MutableSet):
+    """Similar to built-in set(), but based on object IDENTITY.
+
+    This is often important when working with BaseSegment and other types,
+    where different object instances may compare as equal.
+
+    Copied from: https://stackoverflow.com/questions/16994307/identityset-in-python
+    """
+
+    key = id  # should return a hashable object
+
+    def __init__(self, iterable=()):
+        self.map = {}  # id -> object
+        self |= iterable  # add elements from iterable to the set (union)
+
+    def __len__(self):  # Sized
+        return len(self.map)
+
+    def __iter__(self):  # Iterable
+        return self.map.values().__iter__()  # pragma: no cover
+
+    def __contains__(self, x):  # Container
+        return self.key(x) in self.map
+
+    def add(self, value):  # MutableSet
+        """Add an element."""
+        self.map[self.key(value)] = value
+
+    def discard(self, value):  # MutableSet
+        """Remove an element.  Do not raise an exception if absent."""
+        self.map.pop(self.key(value), None)  # pragma: no cover
+
+    def __repr__(self):  # pragma: no cover
+        if not self:
+            return "%s()" % (self.__class__.__name__,)
+        return "%s(%r)" % (self.__class__.__name__, list(self))
diff --git a/src/sqlfluff/rules/L036.py b/src/sqlfluff/rules/L036.py
@@ -5,6 +5,7 @@
 from sqlfluff.core.parser import WhitespaceSegment
 
 from sqlfluff.core.parser import BaseSegment, NewlineSegment
+from sqlfluff.core.parser.segments.base import IdentitySet
 from sqlfluff.core.rules.base import BaseRule, LintFix, LintResult, RuleContext
 from sqlfluff.core.rules.doc_decorators import document_fix_compatible
 from sqlfluff.core.rules.functional import Segments
@@ -254,6 +255,7 @@ def _eval_single_select_target_element(
 
                     def _fixes_for_move_after_select_clause(
                         stop_seg: BaseSegment,
+                        delete_segments: Optional[Segments] = None,
                         add_newline: bool = True,
                     ) -> List[LintFix]:
                         """Cleans up by moving leftover select_clause segments.
@@ -277,10 +279,25 @@ def _fixes_for_move_after_select_clause(
                             start_seg=start_seg,
                             stop_seg=stop_seg,
                         )
-                        fixes = [
-                            LintFix.delete(seg) for seg in move_after_select_clause
+                        # :TRICKY: Below, we have a couple places where we
+                        # filter to guard against deleting the same segment
+                        # multiple times -- this is illegal.
+                        # :TRICKY: Use IdentitySet rather than set() since
+                        # different segments may compare as equal.
+                        all_deletes = IdentitySet(
+                            fix.anchor for fix in fixes if fix.edit_type == "delete"
+                        )
+                        fixes_ = []
+                        for seg in delete_segments or []:
+                            if seg not in all_deletes:
+                                fixes.append(LintFix.delete(seg))
+                                all_deletes.add(seg)
+                        fixes_ += [
+                            LintFix.delete(seg)
+                            for seg in move_after_select_clause
+                            if seg not in all_deletes
                         ]
-                        fixes.append(
+                        fixes_.append(
                             LintFix.create_after(
                                 self._choose_anchor_segment(
                                     context,
@@ -292,7 +309,7 @@ def _fixes_for_move_after_select_clause(
                                 + list(move_after_select_clause),
                             )
                         )
-                        return fixes
+                        return fixes_
 
                     if select_stmt.segments[after_select_clause_idx].is_type("newline"):
                         # Since we're deleting the newline, we should also delete all
@@ -304,10 +321,7 @@ def _fixes_for_move_after_select_clause(
                             loop_while=sp.is_type("whitespace"),
                             start_seg=select_children[start_idx],
                         )
-                        fixes += [LintFix.delete(seg) for seg in to_delete]
-
                         if to_delete:
-
                             # The select_clause is immediately followed by a
                             # newline. Delete the newline in order to avoid leaving
                             # behind an empty line after fix, *unless* we stopped
@@ -325,15 +339,15 @@ def _fixes_for_move_after_select_clause(
                                 )
 
                             fixes += _fixes_for_move_after_select_clause(
-                                to_delete[-1],
+                                to_delete[-1], to_delete
                             )
                     elif select_stmt.segments[after_select_clause_idx].is_type(
                         "whitespace"
                     ):
                         # The select_clause has stuff after (most likely a comment)
                         # Delete the whitespace immediately after the select clause
                         # so the other stuff aligns nicely based on where the select
-                        # clause started
+                        # clause started.
                         fixes += [
                             LintFix.delete(
                                 select_stmt.segments[after_select_clause_idx],
@@ -354,11 +368,10 @@ def _fixes_for_move_after_select_clause(
                             loop_while=sp.is_type("whitespace"),
                             start_seg=select_children[select_clause_idx - 1],
                         )
-                        fixes += [LintFix.delete(seg) for seg in to_delete]
-
                         if to_delete:
                             fixes += _fixes_for_move_after_select_clause(
                                 to_delete[-1],
+                                to_delete,
                                 # If we deleted a newline, create a newline.
                                 any(seg for seg in to_delete if seg.is_type("newline")),
                             )

diff --git a/src/sqlfluff/rules/L039.py b/src/sqlfluff/rules/L039.py
@@ -2,7 +2,7 @@
 from typing import List, Optional
 
 from sqlfluff.core.parser import WhitespaceSegment
-
+from sqlfluff.core.parser.segments.base import IdentitySet
 from sqlfluff.core.rules.base import BaseRule, LintFix, LintResult, RuleContext
 from sqlfluff.core.rules.doc_decorators import document_fix_compatible
 from sqlfluff.core.rules.functional import sp
@@ -38,6 +38,11 @@ def _eval(self, context: RuleContext) -> Optional[List[LintResult]]:
         prev_newline = True
         prev_whitespace = None
         violations = []
+        memory = context.memory
+        if not memory:
+            # Use memory to avoid returning multiple fixes with the same anchor.
+            # (This is illegal.)
+            memory = dict(fix_anchors=IdentitySet())
         for seg in context.segment.segments:
             if seg.is_meta:
                 continue
@@ -119,4 +124,24 @@ def _eval(self, context: RuleContext) -> Optional[List[LintResult]]:
                     )
                 )
 
-        return violations or None
+        final_violations = []
+        if violations:
+            # Check each violation. If any of its fixes uses the same anchor
+            # as a previously returned fix, discard it. The linter can't handle
+            # applying fixes like this. Skipping this issue is okay because it
+            # will be detected and fixed during the next linter pass.
+            for violation in violations:
+                if not any(
+                    [
+                        fix
+                        for fix in violation.fixes
+                        if fix.anchor in memory["fix_anchors"]
+                    ]
+                ):
+                    final_violations.append(violation)
+                    for fix in violation.fixes:
+                        memory["fix_anchors"].add(fix.anchor)
+        if not final_violations:
+            final_violations.append(LintResult())
+        final_violations[-1].memory = memory
+        return final_violations
diff --git a/src/sqlfluff/rules/L050.py b/src/sqlfluff/rules/L050.py
@@ -69,6 +69,12 @@ class Rule_L050(BaseRule):
 
     def _eval(self, context: RuleContext) -> Optional[LintResult]:
         """Files must not begin with newlines or whitespace."""
+        # Only check raw segments. This ensures we don't try and delete the same
+        # whitespace multiple times (i.e. for non-raw segments higher in the
+        # tree).
+        if not context.segment.is_raw():
+            return None
+
         # If parent_stack is empty we are currently at FileSegment.
         if len(context.parent_stack) == 0:
             return None