Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add source fixing capability and fix routines for L046 #3578

Merged
merged 34 commits into from Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
671e5a2
Simplify lint fixing (prep for source fixes)
alanmcruickshank Jul 7, 2022
d2ab174
Split apart fix_string into smaller functions
alanmcruickshank Jul 7, 2022
cac4446
linting
alanmcruickshank Jul 7, 2022
bf68033
Bring back in some checks which *are*used
alanmcruickshank Jul 7, 2022
3902286
Merge branch 'ac/fix_refactor' into ac/fix_refactor_2
alanmcruickshank Jul 7, 2022
b0c6249
linting
alanmcruickshank Jul 7, 2022
8e5068c
Add some no cover elements
alanmcruickshank Jul 7, 2022
e21003e
Merge branch 'ac/fix_refactor' into ac/fix_refactor_2
alanmcruickshank Jul 7, 2022
65d70e7
linting
alanmcruickshank Jul 7, 2022
18449b9
Merge branch 'ac/fix_refactor' into ac/fix_refactor_2
alanmcruickshank Jul 7, 2022
50c5113
Merge branch 'main' into ac/fix_refactor_2
alanmcruickshank Jul 7, 2022
89ebb95
fix incorrect references
alanmcruickshank Jul 7, 2022
b196381
linting
alanmcruickshank Jul 7, 2022
793a650
Fixing test harness
alanmcruickshank Jul 8, 2022
590f122
More complicated tests
alanmcruickshank Jul 8, 2022
af072d5
Mechanics and tests for source fixes
alanmcruickshank Jul 9, 2022
fc0a2fe
Add test cases and get most to pass.
alanmcruickshank Jul 9, 2022
683db58
Source fixes - working for rule test cases
alanmcruickshank Jul 9, 2022
0c5934e
Merge remote-tracking branch 'origin/main' into ac/fix_refactor_3
alanmcruickshank Jul 9, 2022
8ac55d8
linting
alanmcruickshank Jul 9, 2022
6aec80e
coverage
alanmcruickshank Jul 9, 2022
f500d71
mypy fixes
alanmcruickshank Jul 9, 2022
8eb4976
Merge branch 'main' into ac/fix_refactor_3
alanmcruickshank Jul 9, 2022
e52ab92
Update src/sqlfluff/core/parser/segments/base.py
alanmcruickshank Jul 10, 2022
4453142
Update src/sqlfluff/core/parser/segments/base.py
alanmcruickshank Jul 10, 2022
443efc6
Update src/sqlfluff/core/parser/segments/base.py
alanmcruickshank Jul 10, 2022
6d59ddb
Update src/sqlfluff/core/parser/segments/raw.py
alanmcruickshank Jul 10, 2022
5c4c1ec
Merge branch 'main' into ac/fix_refactor_3
alanmcruickshank Jul 10, 2022
c6ecfbe
More comments and test an empty tag fix
alanmcruickshank Jul 10, 2022
d44aa30
Linting
alanmcruickshank Jul 10, 2022
b8db360
Merge branch 'main' into ac/fix_refactor_3
alanmcruickshank Jul 10, 2022
553b068
test fixes
alanmcruickshank Jul 10, 2022
8fe26f6
Merge branch 'ac/fix_refactor_3' of https://github.com/sqlfluff/sqlfl…
alanmcruickshank Jul 10, 2022
faa1c68
Merge branch 'main' into ac/fix_refactor_3
barrywhart Jul 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/sqlfluff/core/linter/linted_file.py
Expand Up @@ -31,7 +31,7 @@
from sqlfluff.core.templaters import TemplatedFile, RawFileSlice

# Classes needed only for type checking
from sqlfluff.core.parser.segments.base import BaseSegment, FixPatch
from sqlfluff.core.parser.segments import BaseSegment, FixPatch

from sqlfluff.core.linter.common import NoQaDirective

Expand Down Expand Up @@ -348,6 +348,14 @@ def _generate_source_patches(
)
filtered_source_patches.append(patch)
dedupe_buffer.append(patch.dedupe_tuple())
# Handle the easy case of an explicit source fix
elif patch.patch_category == "source":
linter_logger.info(
" * Keeping explicit source fix patch: %s",
patch,
)
filtered_source_patches.append(patch)
dedupe_buffer.append(patch.dedupe_tuple())
# Is it a zero length patch.
elif (
patch.source_slice.start == patch.source_slice.stop
Expand Down Expand Up @@ -439,6 +447,20 @@ def _slice_source_file_using_patches(
slice_buff.append(next_so_slice)
source_idx = next_so_slice.stop

# Does this patch cover the next source-only slice directly?
if (
source_only_slices
and patch.source_slice == source_only_slices[0].source_slice()
):
linter_logger.info(
"Removing next source only slice from the stack because it "
"covers the same area of source file as the current patch: %s %s",
source_only_slices[0],
patch,
)
# If it does, remove it so that we don't duplicate it.
source_only_slices.pop(0)

# Is there a gap between current position and this patch?
if patch.source_slice.start > source_idx:
# Add a slice up to this patch.
Expand Down
27 changes: 12 additions & 15 deletions src/sqlfluff/core/linter/linter.py
Expand Up @@ -4,16 +4,7 @@
import os
import time
import logging
from typing import (
Any,
List,
Sequence,
Optional,
Tuple,
cast,
Iterable,
Iterator,
)
from typing import Any, List, Sequence, Optional, Tuple, cast, Iterable, Iterator, Set

import pathspec
import regex
Expand All @@ -34,7 +25,7 @@
from sqlfluff.core.config import FluffConfig, ConfigLoader, progress_bar_configuration

# Classes needed only for type checking
from sqlfluff.core.parser.segments.base import BaseSegment
from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix
from sqlfluff.core.parser.segments.meta import MetaSegment
from sqlfluff.core.parser.segments.raw import RawSegment
from sqlfluff.core.rules.base import BaseRule
Expand Down Expand Up @@ -479,7 +470,7 @@ def lint_fix_parsed(
# A placeholder for the fixes we had on the previous loop
last_fixes = None
# Keep a set of previous versions to catch infinite loops.
previous_versions = {tree.raw}
previous_versions: Set[Tuple[str, Tuple[SourceFix, ...]]] = {(tree.raw, ())}

# If we are fixing then we want to loop up to the runaway_limit, otherwise just
# once for linting.
Expand Down Expand Up @@ -595,12 +586,18 @@ def is_first_linter_pass():
new_tree, _, _ = tree.apply_fixes(
config.get("dialect_obj"), crawler.code, anchor_info
)
# Check for infinite loops
if new_tree.raw not in previous_versions:
# Check for infinite loops. We use a combination of the
# fixed templated file and the list of source fixes to
# apply.
loop_check_tuple = (
new_tree.raw,
tuple(new_tree.source_fixes),
)
if loop_check_tuple not in previous_versions:
# We've not seen this version of the file so
# far. Continue.
tree = new_tree
previous_versions.add(tree.raw)
previous_versions.add(loop_check_tuple)
changed = True
continue
else:
Expand Down
4 changes: 4 additions & 0 deletions src/sqlfluff/core/parser/segments/__init__.py
Expand Up @@ -6,6 +6,8 @@
UnparsableSegment,
BracketedSegment,
IdentitySet,
FixPatch,
SourceFix,
)
from sqlfluff.core.parser.segments.generator import SegmentGenerator
from sqlfluff.core.parser.segments.raw import (
Expand Down Expand Up @@ -47,4 +49,6 @@
"Dedent",
"TemplateSegment",
"IdentitySet",
"FixPatch",
"SourceFix",
)
107 changes: 98 additions & 9 deletions src/sqlfluff/core/parser/segments/base.py
Expand Up @@ -13,7 +13,7 @@
from copy import deepcopy
from dataclasses import dataclass, field, replace
from io import StringIO
from itertools import takewhile
from itertools import takewhile, chain
from typing import (
Any,
Callable,
Expand All @@ -22,6 +22,7 @@
List,
Tuple,
Iterator,
TYPE_CHECKING,
)
import logging

Expand All @@ -47,10 +48,32 @@
from sqlfluff.core.parser.context import ParseContext
from sqlfluff.core.templaters.base import TemplatedFile

if TYPE_CHECKING:
from sqlfluff.core.rules.base import LintFix # pragma: no cover

# Instantiate the linter logger (only for use in methods involved with fixing.)
linter_logger = logging.getLogger("sqlfluff.linter")


@dataclass(frozen=True)
class SourceFix:
"""A stored reference to a fix in the non-templated file."""

edit: str
source_slice: slice
# TODO: It might be possible to refactor this to not require
# a templated_slice (because in theory it's unnecessary).
# However much of the fix handling code assumes we need
# a position in the templated file to interpret it.
# More work required to achieve that if desired.
templated_slice: slice

def __hash__(self):
# Only hash based on the source slice, not the
# templated slice (which might change)
return hash((self.edit, self.source_slice.start, self.source_slice.stop))


@dataclass
class FixPatch:
"""An edit patch for a source file."""
Expand Down Expand Up @@ -108,10 +131,42 @@ class AnchorEditInfo:
create_before: int = field(default=0)
create_after: int = field(default=0)
fixes: List = field(default_factory=list)
source_fixes: List = field(default_factory=list)
# First fix of edit_type "replace" in "fixes"
_first_replace: Optional["LintFix"] = field(default=None)
alanmcruickshank marked this conversation as resolved.
Show resolved Hide resolved

def add(self, fix: "LintFix"):
"""Adds the fix and updates stats.

We also allow potentially multiple source fixes on the same
anchor by condensing them together here.
"""
if fix.is_just_source_edit():
assert fix.edit
# is_just_source_edit confirms there will be a list
# so we can hint that to mypy.
self.source_fixes += fix.edit[0].source_fixes
# is there already a replace?
if self._first_replace:
assert self._first_replace.edit
# is_just_source_edit confirms there will be a list
# and that's the only way to get into _first_replace
# if it's populated so we can hint that to mypy.
linter_logger.info(
"Multiple edits detected, condensing %s onto %s",
fix,
self._first_replace,
)
self._first_replace.edit[0] = self._first_replace.edit[0].edit(
source_fixes=self.source_fixes
)
linter_logger.info("Condensed fix: %s", self._first_replace)
# Return without otherwise adding in this fix.
return

def add(self, fix):
"""Adds the fix and updates stats."""
self.fixes.append(fix)
if fix.edit_type == "replace" and not self._first_replace:
self._first_replace = fix
setattr(self, fix.edit_type, getattr(self, fix.edit_type) + 1)

@property
Expand Down Expand Up @@ -347,6 +402,11 @@ def raw_segments(self) -> List["BaseSegment"]:
"""Returns a list of raw segments in this segment."""
return self.get_raw_segments()

@cached_property
def source_fixes(self) -> List[SourceFix]:
"""Return any source fixes as list."""
return list(chain.from_iterable(s.source_fixes for s in self.segments))

@cached_property
def first_non_whitespace_segment_raw_upper(self) -> Optional[str]:
"""Returns the first non-whitespace subsegment of this segment."""
Expand Down Expand Up @@ -669,6 +729,7 @@ def _recalculate_caches(self):
"matched_length",
"raw_segments",
"first_non_whitespace_segment_raw_upper",
"source_fixes",
]:
self.__dict__.pop(key, None)

Expand Down Expand Up @@ -1260,7 +1321,9 @@ def apply_fixes(
return self, [], []

@classmethod
def compute_anchor_edit_info(cls, fixes) -> Dict[int, AnchorEditInfo]:
def compute_anchor_edit_info(
cls, fixes: List["LintFix"]
) -> Dict[int, AnchorEditInfo]:
"""Group and count fixes by anchor, return dictionary."""
anchor_info = defaultdict(AnchorEditInfo) # type: ignore
for fix in fixes:
Expand Down Expand Up @@ -1299,6 +1362,25 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme
def _log_apply_fixes_check_issue(message, *args): # pragma: no cover
linter_logger.critical(message, exc_info=True, *args)

def _iter_source_fix_patches(
self, templated_file: TemplatedFile
) -> Iterator[FixPatch]:
"""Yield any source patches as fixes now.

NOTE: This yields source fixes for the segment and any of its
children, so it's important to call it at the right point in
the recursion to avoid yielding duplicates.
"""
for source_fix in self.source_fixes:
yield FixPatch(
source_fix.templated_slice,
source_fix.edit,
patch_category="source",
source_slice=source_fix.source_slice,
templated_str=templated_file.templated_str[source_fix.templated_slice],
source_str=templated_file.source_str[source_fix.source_slice],
)

def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]:
"""Iterate through the segments generating fix patches.

Expand All @@ -1311,23 +1393,28 @@ def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]:
"""
# Does it match? If so we can ignore it.
assert self.pos_marker
templated_str = templated_file.templated_str
matches = self.raw == templated_str[self.pos_marker.templated_slice]
templated_raw = templated_file.templated_str[self.pos_marker.templated_slice]
matches = self.raw == templated_raw
if matches:
# First yield any source fixes
yield from self._iter_source_fix_patches(templated_file)
# Then return.
return

# If we're here, the segment doesn't match the original.
linter_logger.debug(
"%s at %s: Original: [%r] Fixed: [%r]",
type(self).__name__,
self.pos_marker.templated_slice,
templated_str[self.pos_marker.templated_slice],
templated_raw,
self.raw,
)

# If it's all literal, then we don't need to recurse.
if self.pos_marker.is_literal():
# Yield the position in the source file and the patch
# First yield any source fixes
yield from self._iter_source_fix_patches(templated_file)
# Then yield the position in the source file and the patch
yield FixPatch.infer_from_template(
self.pos_marker.templated_slice,
self.raw,
Expand Down Expand Up @@ -1419,7 +1506,9 @@ def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]:
source_str=templated_file.source_str[source_slice],
)

def edit(self, raw):
def edit(
self, raw: Optional[str] = None, source_fixes: Optional[List[SourceFix]] = None
):
"""Stub."""
raise NotImplementedError()

Expand Down
35 changes: 32 additions & 3 deletions src/sqlfluff/core/parser/segments/meta.py
@@ -1,7 +1,8 @@
"""Indent and Dedent classes."""

from sqlfluff.core.parser.markers import PositionMarker
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.segments.raw import RawSegment
from sqlfluff.core.parser.segments.raw import RawSegment, SourceFix
from sqlfluff.core.parser.context import ParseContext
from typing import Optional, List

Expand Down Expand Up @@ -93,14 +94,20 @@ class TemplateSegment(MetaSegment):

type = "placeholder"

def __init__(self, pos_marker=None, source_str="", block_type=""):
def __init__(
self,
pos_marker: Optional[PositionMarker] = None,
source_str: str = "",
block_type: str = "",
source_fixes: Optional[List[SourceFix]] = None,
):
"""Initialise a placeholder with the source code embedded."""
if not source_str: # pragma: no cover
raise ValueError("Cannot instantiate TemplateSegment without a source_str.")
self.source_str = source_str
self.block_type = block_type
# Call the super of the pos_marker.
super().__init__(pos_marker=pos_marker)
super().__init__(pos_marker=pos_marker, source_fixes=source_fixes)

def _suffix(self):
"""Also output what it's a placeholder for."""
Expand All @@ -117,3 +124,25 @@ def to_tuple(self, code_only=False, show_raw=False, include_meta=False):
return (self.get_type(), self.source_str)
else: # pragma: no cover TODO?
return (self.get_type(), self.raw)

def edit(
self, raw: Optional[str] = None, source_fixes: Optional[List[SourceFix]] = None
):
"""Create a new segment, with exactly the same position but different content.

Returns:
A copy of this object with new contents.

Used mostly by fixes.

"""
if raw:
raise ValueError(
"Cannot set raw of a template placeholder!"
) # pragma: no cover
return self.__class__(
pos_marker=self.pos_marker,
source_str=self.source_str,
block_type=self.block_type,
source_fixes=source_fixes or self.source_fixes,
)