From 671e5a2709a103a5549f8fffd2c6c0ab58ee24ab Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 09:46:06 +0100 Subject: [PATCH 01/24] Simplify lint fixing (prep for source fixes) --- src/sqlfluff/core/linter/linted_file.py | 101 ++++------------------ src/sqlfluff/core/parser/segments/base.py | 72 +++++++-------- 2 files changed, 56 insertions(+), 117 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index cd626db99d3..33a5e6192bd 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -31,7 +31,7 @@ from sqlfluff.core.templaters import TemplatedFile # Classes needed only for type checking -from sqlfluff.core.parser.segments.base import BaseSegment, FixPatch, EnrichedFixPatch +from sqlfluff.core.parser.segments.base import BaseSegment, FixPatch from sqlfluff.core.linter.common import NoQaDirective @@ -278,30 +278,17 @@ def fix_string(self) -> Tuple[Any, bool]: dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! - patch: FixPatch # Could be FixPatch or its subclass, EnrichedFixPatch for idx, patch in enumerate( self.tree.iter_patches(templated_file=self.templated_file) ): linter_logger.debug(" %s Yielded patch: %s", idx, patch) self._log_hints(patch, self.templated_file) - # Get source_slice. - try: - enriched_patch = patch.enrich(self.templated_file) - except ValueError: # pragma: no cover - linter_logger.info( - " - Skipping. Source space Value Error. i.e. attempted " - "insertion within templated section." - ) - # If we try and slice within a templated section, then we may fail - # in which case, we should skip this patch. - continue - # Check for duplicates - if enriched_patch.dedupe_tuple() in dedupe_buffer: + if patch.dedupe_tuple() in dedupe_buffer: linter_logger.info( " - Skipping. Source space Duplicate: %s", - enriched_patch.dedupe_tuple(), + patch.dedupe_tuple(), ) continue @@ -315,7 +302,7 @@ def fix_string(self) -> Tuple[Any, bool]: # Get the affected raw slices. local_raw_slices = self.templated_file.raw_slices_spanning_source_slice( - enriched_patch.source_slice + patch.source_slice ) local_type_list = [slc.slice_type for slc in local_raw_slices] @@ -323,80 +310,28 @@ def fix_string(self) -> Tuple[Any, bool]: if not local_type_list or set(local_type_list) == {"literal"}: linter_logger.info( " * Keeping patch on new or literal-only section: %s", - enriched_patch, + patch, ) - filtered_source_patches.append(enriched_patch) - dedupe_buffer.append(enriched_patch.dedupe_tuple()) + filtered_source_patches.append(patch) + dedupe_buffer.append(patch.dedupe_tuple()) # Is it a zero length patch. elif ( - enriched_patch.source_slice.start == enriched_patch.source_slice.stop - and enriched_patch.source_slice.start == local_raw_slices[0].source_idx + patch.source_slice.start == patch.source_slice.stop + and patch.source_slice.start == local_raw_slices[0].source_idx ): linter_logger.info( " * Keeping insertion patch on slice boundary: %s", - enriched_patch, - ) - filtered_source_patches.append(enriched_patch) - dedupe_buffer.append(enriched_patch.dedupe_tuple()) - # If it's ONLY templated then we should skip it. - elif "literal" not in local_type_list: # pragma: no cover - linter_logger.info( - " - Skipping patch over templated section: %s", enriched_patch - ) - # If it's an insertion (i.e. the string in the pre-fix template is '') then - # we won't be able to place it, so skip. - elif not enriched_patch.templated_str: # pragma: no cover TODO? - linter_logger.info( - " - Skipping insertion patch in templated section: %s", - enriched_patch, - ) - # If the string from the templated version isn't in the source, then we - # can't fix it. - elif ( - enriched_patch.templated_str not in enriched_patch.source_str - ): # pragma: no cover TODO? - linter_logger.info( - " - Skipping edit patch on templated content: %s", - enriched_patch, - ) - else: # pragma: no cover - # Identify all the places the string appears in the source content. - positions = list( - findall(enriched_patch.templated_str, enriched_patch.source_str) - ) - if len(positions) != 1: - linter_logger.debug( - " - Skipping edit patch on non-unique templated " - "content: %s", - enriched_patch, - ) - continue - # We have a single occurrence of the thing we want to patch. This - # means we can use its position to place our patch. - new_source_slice = slice( # pragma: no cover - enriched_patch.source_slice.start + positions[0], - enriched_patch.source_slice.start - + positions[0] - + len(enriched_patch.templated_str), - ) - enriched_patch = EnrichedFixPatch( # pragma: no cover - source_slice=new_source_slice, - templated_slice=enriched_patch.templated_slice, - patch_category=enriched_patch.patch_category, - fixed_raw=enriched_patch.fixed_raw, - templated_str=enriched_patch.templated_str, - source_str=enriched_patch.source_str, + patch, ) - linter_logger.debug( # pragma: no cover - " * Keeping Tricky Case. Positions: %s, New Slice: %s, " - "Patch: %s", - positions, - new_source_slice, - enriched_patch, + filtered_source_patches.append(patch) + dedupe_buffer.append(patch.dedupe_tuple()) + else: + NotImplementedError( # pragma: no cover + "Template tracing logic means that this situation should " + "never occur. Please report this as an issue on GitHub " + "with a version of your query which triggers this " + "error" ) - filtered_source_patches.append(enriched_patch) # pragma: no cover - dedupe_buffer.append(enriched_patch.dedupe_tuple()) # pragma: no cover - continue # pragma: no cover # Sort the patches before building up the file. filtered_source_patches = sorted( diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index aa571d99b06..649edde4b78 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -54,7 +54,7 @@ @dataclass class FixPatch: - """An edit patch for a templated file.""" + """An edit patch for a source file.""" templated_slice: slice fixed_raw: str @@ -62,37 +62,35 @@ class FixPatch: # than for function. It allows traceability of *why* this patch was # generated. It has no significance for processing. patch_category: str - - def enrich(self, templated_file: TemplatedFile) -> "EnrichedFixPatch": - """Convert patch to source space.""" - source_slice = templated_file.templated_slice_to_source_slice( - self.templated_slice, - ) - return EnrichedFixPatch( - source_slice=source_slice, - templated_slice=self.templated_slice, - patch_category=self.patch_category, - fixed_raw=self.fixed_raw, - templated_str=templated_file.templated_str[self.templated_slice], - source_str=templated_file.source_str[source_slice], - ) - - -@dataclass -class EnrichedFixPatch(FixPatch): - """An edit patch for a source file.""" - source_slice: slice templated_str: str source_str: str - def enrich(self, templated_file: TemplatedFile) -> "EnrichedFixPatch": - """No-op override of base class function.""" - return self - def dedupe_tuple(self): """Generate a tuple of this fix for deduping.""" return (self.source_slice, self.fixed_raw) + + @classmethod + def infer_from_template( + cls, + templated_slice: slice, + fixed_raw:str, + patch_category:str, + templated_file: TemplatedFile, + ): + # NOTE: There used to be error handling here to catch ValueErrors. + # Removed in July 2022 because untestable. + source_slice = templated_file.templated_slice_to_source_slice( + templated_slice, + ) + return cls( + source_slice=source_slice, + templated_slice=templated_slice, + patch_category=patch_category, + fixed_raw=fixed_raw, + templated_str=templated_file.templated_str[templated_slice], + source_str=templated_file.source_str[source_slice], + ) @dataclass @@ -1297,7 +1295,7 @@ def _log_apply_fixes_check_issue(message, *args): # pragma: no cover def iter_patches( self, templated_file: TemplatedFile - ) -> Iterator[Union[EnrichedFixPatch, FixPatch]]: + ) -> Iterator[FixPatch]: """Iterate through the segments generating fix patches. The patches are generated in TEMPLATED space. This is important @@ -1326,8 +1324,11 @@ def iter_patches( # If it's all literal, then we don't need to recurse. if self.pos_marker.is_literal(): # Yield the position in the source file and the patch - yield FixPatch( - self.pos_marker.templated_slice, self.raw, patch_category="literal" + yield FixPatch.infer_from_template( + self.pos_marker.templated_slice, + self.raw, + patch_category="literal", + templated_file=templated_file ) # Can we go deeper? elif not self.segments: @@ -1367,7 +1368,7 @@ def iter_patches( if start_diff > 0 or insert_buff: # If we have an insert buffer, then it's an edit, otherwise a # deletion. - yield FixPatch( + yield FixPatch.infer_from_template( slice( segment.pos_marker.templated_slice.start - max(start_diff, 0), @@ -1375,7 +1376,9 @@ def iter_patches( ), insert_buff, patch_category="mid_point", + templated_file=templated_file, ) + insert_buff = "" # Now we deal with any changes *within* the segment itself. @@ -1398,11 +1401,12 @@ def iter_patches( templated_idx, self.pos_marker.templated_slice.stop, ) - # By returning an EnrichedFixPatch (rather than FixPatch), which - # includes a source_slice field, we ensure that fixes adjacent - # to source-only slices (e.g. {% endif %}) are placed - # appropriately relative to source-only slices. - yield EnrichedFixPatch( + # We determine the source_slice directly rather than + # infering it so that we can be very specific that + # we ensure that fixes adjacent to source-only slices + # (e.g. {% endif %}) are placed appropriately relative + # to source-only slices. + yield FixPatch( source_slice=source_slice, templated_slice=templated_slice, patch_category="end_point", From d2ab174a7ec52f195da1c2a7d963771da69b221d Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 10:59:48 +0100 Subject: [PATCH 02/24] Split apart fix_string into smaller functions --- src/sqlfluff/core/linter/linted_file.py | 121 ++++++++++++++++++----- src/sqlfluff/core/templaters/__init__.py | 3 +- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 33a5e6192bd..78de945b110 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -20,6 +20,7 @@ Union, cast, Type, + List, ) from sqlfluff.core.errors import ( @@ -28,7 +29,7 @@ CheckTuple, ) from sqlfluff.core.string_helpers import findall -from sqlfluff.core.templaters import TemplatedFile +from sqlfluff.core.templaters import TemplatedFile, RawFileSlice # Classes needed only for type checking from sqlfluff.core.parser.segments.base import BaseSegment, FixPatch @@ -266,23 +267,56 @@ def fix_string(self) -> Tuple[Any, bool]: original_source = self.templated_file.source_str - # Make sure no patches overlap and divide up the source file into slices. - # Any Template tags in the source file are off limits. - source_only_slices = self.templated_file.source_only_slices() + # Generate patches from the fixed tree. In the process we sort + # and deduplicate them so that the resultant list is in the + # the right order for the source file without any duplicates. + # TODO: Requires a mechanism for generating patches for source only + # fixes. + filtered_source_patches = self._generate_source_patches( + self.tree, self.templated_file + ) + # Any Template tags in the source file are off limits, unless + # we're explicitly fixing the source file. + source_only_slices = self.templated_file.source_only_slices() linter_logger.debug("Source-only slices: %s", source_only_slices) + # We now slice up the file using the patches and any source only slices. + # This gives us regions to apply changes to. + # TODO: This is the last hurdle for source only fixes. + slice_buff = self._slice_source_file_using_patches( + filtered_source_patches, source_only_slices, self.templated_file.source_str + ) + + linter_logger.debug("Final slice buffer: %s", slice_buff) + + # Iterate through the patches, building up the new string. + fixed_source_string = self._build_up_fixed_source_string( + slice_buff, filtered_source_patches, self.templated_file.source_str + ) + + # The success metric here is whether anything ACTUALLY changed. + return fixed_source_string, fixed_source_string != original_source + + @classmethod + def _generate_source_patches( + cls, tree: BaseSegment, templated_file: TemplatedFile + ) -> List[FixPatch]: + """Use the fixed tree to generate source patches. + + Importantly here we deduplicate and sort the patches + from their position in the templated file into their + intended order in the source file. + """ # Iterate patches, filtering and translating as we go: linter_logger.debug("### Beginning Patch Iteration.") filtered_source_patches = [] dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! - for idx, patch in enumerate( - self.tree.iter_patches(templated_file=self.templated_file) - ): + for idx, patch in enumerate(tree.iter_patches(templated_file=templated_file)): linter_logger.debug(" %s Yielded patch: %s", idx, patch) - self._log_hints(patch, self.templated_file) + cls._log_hints(patch, templated_file) # Check for duplicates if patch.dedupe_tuple() in dedupe_buffer: @@ -296,12 +330,13 @@ def fix_string(self) -> Tuple[Any, bool]: # or disrupt any templated sections. # The intent here is that unless explicitly stated, a fix should never # disrupt a templated section. - # NOTE: We rely here on the patches being sorted. - # TODO: Implement a mechanism for doing templated section fixes. For - # now it's just not allowed. + # NOTE: We rely here on the patches being generated in order. + # TODO: Implement a mechanism for doing templated section fixes. Given + # these patches are currently generated from fixed segments, there will + # likely need to be an entirely different mechanism # Get the affected raw slices. - local_raw_slices = self.templated_file.raw_slices_spanning_source_slice( + local_raw_slices = templated_file.raw_slices_spanning_source_slice( patch.source_slice ) local_type_list = [slc.slice_type for slc in local_raw_slices] @@ -326,7 +361,7 @@ def fix_string(self) -> Tuple[Any, bool]: filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) else: - NotImplementedError( # pragma: no cover + NotImplementedError( # pragma: no cover "Template tracing logic means that this situation should " "never occur. Please report this as an issue on GitHub " "with a version of your query which triggers this " @@ -334,15 +369,35 @@ def fix_string(self) -> Tuple[Any, bool]: ) # Sort the patches before building up the file. - filtered_source_patches = sorted( - filtered_source_patches, key=lambda x: x.source_slice.start - ) + return sorted(filtered_source_patches, key=lambda x: x.source_slice.start) + + @staticmethod + def _slice_source_file_using_patches( + source_patches: List[FixPatch], + source_only_slices: List[RawFileSlice], + raw_source_string: str, + ) -> List[slice]: + """Use patches to safely slice up the file before fixing. + + This uses source only slices to avoid overwriting sections + of templated code in the source file (when we don't want to). + + We assume that the source patches have already been + sorted and deduplicated. Sorting is important. If the slices + aren't sorted then this function will miss chunks. + If there are overlaps or duplicates then this function + may produce strange results. + """ # We now slice up the file using the patches and any source only slices. # This gives us regions to apply changes to. slice_buff = [] source_idx = 0 - for patch in filtered_source_patches: + for patch in source_patches: # Are there templated slices at or before the start of this patch? + # TODO: We'll need to explicit handling for template fixes here, because + # they ARE source only slices. If we can get handling to work properly + # here then this is the last hurdle and it will flow through + # smoothly from here. while ( source_only_slices and source_only_slices[0].source_idx < patch.source_slice.start @@ -374,16 +429,30 @@ def fix_string(self) -> Tuple[Any, bool]: slice_buff.append(patch.source_slice) source_idx = patch.source_slice.stop # Add a tail slice. - if source_idx < len(self.templated_file.source_str): - slice_buff.append(slice(source_idx, len(self.templated_file.source_str))) + if source_idx < len(raw_source_string): + slice_buff.append(slice(source_idx, len(raw_source_string))) - linter_logger.debug("Final slice buffer: %s", slice_buff) + return slice_buff + @staticmethod + def _build_up_fixed_source_string( + source_file_slices: List[slice], + source_patches: List[FixPatch], + raw_source_string: str, + ) -> str: + """Use patches and raw file to fix the source file. + + This assumes that patches and slices have already + been coordinated. If they haven't then this will + fail because we rely on patches having a corresponding + slice of exactly the right file in the list of file + slices. + """ # Iterate through the patches, building up the new string. str_buff = "" - for source_slice in slice_buff: + for source_slice in source_file_slices: # Is it one in the patch buffer: - for patch in filtered_source_patches: + for patch in source_patches: if patch.source_slice == source_slice: # Use the patched version linter_logger.debug( @@ -400,12 +469,10 @@ def fix_string(self) -> Tuple[Any, bool]: linter_logger.debug( "Appending Raw: %s %r", source_slice, - self.templated_file.source_str[source_slice], + raw_source_string[source_slice], ) - str_buff += self.templated_file.source_str[source_slice] - - # The success metric here is whether anything ACTUALLY changed. - return str_buff, str_buff != original_source + str_buff += raw_source_string[source_slice] + return str_buff def persist_tree(self, suffix: str = "") -> bool: """Persist changes to the given path.""" diff --git a/src/sqlfluff/core/templaters/__init__.py b/src/sqlfluff/core/templaters/__init__.py index 42dc6a49f33..d79069207f5 100644 --- a/src/sqlfluff/core/templaters/__init__.py +++ b/src/sqlfluff/core/templaters/__init__.py @@ -4,7 +4,7 @@ # Although these shouldn't usually be instantiated from here # we import them to make sure they get registered. -from sqlfluff.core.templaters.base import RawTemplater +from sqlfluff.core.templaters.base import RawTemplater, RawFileSlice from sqlfluff.core.templaters.jinja import JinjaTemplater from sqlfluff.core.templaters.python import PythonTemplater from sqlfluff.core.templaters.placeholder import PlaceholderTemplater @@ -16,6 +16,7 @@ def core_templaters(): __all__ = ( + "RawFileSlice", "TemplatedFile", "RawTemplater", "JinjaTemplater", From cac4446a88fe7a3881e86f490982f5dcdd32de66 Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 11:05:20 +0100 Subject: [PATCH 03/24] linting --- src/sqlfluff/core/linter/linted_file.py | 3 +-- src/sqlfluff/core/parser/segments/base.py | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 33a5e6192bd..0db514252f3 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -27,7 +27,6 @@ SQLLintError, CheckTuple, ) -from sqlfluff.core.string_helpers import findall from sqlfluff.core.templaters import TemplatedFile # Classes needed only for type checking @@ -326,7 +325,7 @@ def fix_string(self) -> Tuple[Any, bool]: filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) else: - NotImplementedError( # pragma: no cover + NotImplementedError( # pragma: no cover "Template tracing logic means that this situation should " "never occur. Please report this as an issue on GitHub " "with a version of your query which triggers this " diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 649edde4b78..7c5ae4f3209 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -22,7 +22,6 @@ List, Tuple, Iterator, - Union, ) import logging @@ -69,15 +68,22 @@ class FixPatch: def dedupe_tuple(self): """Generate a tuple of this fix for deduping.""" return (self.source_slice, self.fixed_raw) - + @classmethod def infer_from_template( cls, templated_slice: slice, - fixed_raw:str, - patch_category:str, + fixed_raw: str, + patch_category: str, templated_file: TemplatedFile, ): + """Infer source position from just templated position. + + In cases where we expect it to be uncontroversial it + is sometimes more straightforward to just leverage + the existing mapping functions to auto-generate the + source position rather than calculating it explicitly. + """ # NOTE: There used to be error handling here to catch ValueErrors. # Removed in July 2022 because untestable. source_slice = templated_file.templated_slice_to_source_slice( @@ -1293,9 +1299,7 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme def _log_apply_fixes_check_issue(message, *args): # pragma: no cover linter_logger.critical(message, exc_info=True, *args) - def iter_patches( - self, templated_file: TemplatedFile - ) -> Iterator[FixPatch]: + def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]: """Iterate through the segments generating fix patches. The patches are generated in TEMPLATED space. This is important @@ -1328,7 +1332,7 @@ def iter_patches( self.pos_marker.templated_slice, self.raw, patch_category="literal", - templated_file=templated_file + templated_file=templated_file, ) # Can we go deeper? elif not self.segments: @@ -1402,7 +1406,7 @@ def iter_patches( self.pos_marker.templated_slice.stop, ) # We determine the source_slice directly rather than - # infering it so that we can be very specific that + # infering it so that we can be very specific that # we ensure that fixes adjacent to source-only slices # (e.g. {% endif %}) are placed appropriately relative # to source-only slices. From bf680330470271d03c9757b14619166f0dbf68ef Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 11:25:47 +0100 Subject: [PATCH 04/24] Bring back in some checks which *are*used --- src/sqlfluff/core/linter/linted_file.py | 37 +++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 0db514252f3..ed51f188fb8 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -27,6 +27,7 @@ SQLLintError, CheckTuple, ) +from sqlfluff.core.string_helpers import findall from sqlfluff.core.templaters import TemplatedFile # Classes needed only for type checking @@ -325,12 +326,38 @@ def fix_string(self) -> Tuple[Any, bool]: filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) else: - NotImplementedError( # pragma: no cover - "Template tracing logic means that this situation should " - "never occur. Please report this as an issue on GitHub " - "with a version of your query which triggers this " - "error" + # We've got a situation where the ends of our patch need to be + # more carefully mapped. Likely because we're greedily including + # a section of source templating with our fix and we need to work + # around it gracefully. + + # Identify all the places the string appears in the source content. + positions = list(findall(patch.templated_str, patch.source_str)) + if len(positions) != 1: + linter_logger.debug( + " - Skipping edit patch on non-unique templated " + "content: %s", + patch, + ) + continue + + # We have a single occurrence of the thing we want to patch. This + # means we can use its position to place our patch. + new_source_slice = slice( + patch.source_slice.start + positions[0], + patch.source_slice.start + positions[0] + len(patch.templated_str), ) + linter_logger.debug( + " * Keeping Tricky Case. Positions: %s, New Slice: %s, " + "Patch: %s", + positions, + new_source_slice, + patch, + ) + patch.source_slice = new_source_slice + filtered_source_patches.append(patch) + dedupe_buffer.append(patch.dedupe_tuple()) + continue # Sort the patches before building up the file. filtered_source_patches = sorted( From b0c62496894d77e2e10f25f2eaf0780c4512124d Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 11:39:00 +0100 Subject: [PATCH 05/24] linting --- src/sqlfluff/core/linter/linted_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index ae33b277b89..b7e2d817ff0 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -20,7 +20,6 @@ Union, cast, Type, - List, ) from sqlfluff.core.errors import ( From 8e5068c0a404a97b9098c2bd2743b92a0c359dae Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 15:08:57 +0100 Subject: [PATCH 06/24] Add some no cover elements --- src/sqlfluff/core/linter/linted_file.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index ed51f188fb8..3405b7e8740 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -334,12 +334,16 @@ def fix_string(self) -> Tuple[Any, bool]: # Identify all the places the string appears in the source content. positions = list(findall(patch.templated_str, patch.source_str)) if len(positions) != 1: - linter_logger.debug( + # NOTE: This section is not covered in tests. While we + # don't have an example of it's use (we should), the + # code after this relies on there being only one + # instance found - so the safety check remains. + linter_logger.debug( # pragma: no cover " - Skipping edit patch on non-unique templated " "content: %s", patch, ) - continue + continue # pragma: no cover # We have a single occurrence of the thing we want to patch. This # means we can use its position to place our patch. From 65d70e73ff64b4fcf053365056acfd4863a3ab1c Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 15:11:51 +0100 Subject: [PATCH 07/24] linting --- src/sqlfluff/core/linter/linted_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 3405b7e8740..0bb5fe3fe4a 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -335,7 +335,7 @@ def fix_string(self) -> Tuple[Any, bool]: positions = list(findall(patch.templated_str, patch.source_str)) if len(positions) != 1: # NOTE: This section is not covered in tests. While we - # don't have an example of it's use (we should), the + # don't have an example of it's use (we should), the # code after this relies on there being only one # instance found - so the safety check remains. linter_logger.debug( # pragma: no cover From 89ebb957da5b62670dba398f0fc1ad6f9fcc6e96 Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 20:08:50 +0100 Subject: [PATCH 08/24] fix incorrect references --- src/sqlfluff/core/linter/linted_file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 885b25dd1a4..9ebddbda918 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -314,10 +314,10 @@ def _generate_source_patches( # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! for idx, patch in enumerate( - self.tree.iter_patches(templated_file=templated_file) + tree.iter_patches(templated_file=templated_file) ): linter_logger.debug(" %s Yielded patch: %s", idx, patch) - self._log_hints(patch, self.templated_file) + cls._log_hints(patch, templated_file) # Check for duplicates if patch.dedupe_tuple() in dedupe_buffer: From b196381d9b4ed7f5ca11f3f95f2b5590a445a96e Mon Sep 17 00:00:00 2001 From: Alan Date: Thu, 7 Jul 2022 20:14:43 +0100 Subject: [PATCH 09/24] linting --- src/sqlfluff/core/linter/linted_file.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 9ebddbda918..b145aa112c2 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -313,9 +313,7 @@ def _generate_source_patches( dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! - for idx, patch in enumerate( - tree.iter_patches(templated_file=templated_file) - ): + for idx, patch in enumerate(tree.iter_patches(templated_file=templated_file)): linter_logger.debug(" %s Yielded patch: %s", idx, patch) cls._log_hints(patch, templated_file) From 793a650a0c226686bd9a46814af64e889d842fc9 Mon Sep 17 00:00:00 2001 From: Alan Date: Fri, 8 Jul 2022 10:16:51 +0100 Subject: [PATCH 10/24] Fixing test harness --- src/sqlfluff/core/linter/linted_file.py | 2 +- src/sqlfluff/core/parser/segments/__init__.py | 2 + test/core/linted_file_test.py | 168 ++++++++++++++++++ test/core/templaters/base_test.py | 47 +++-- 4 files changed, 206 insertions(+), 13 deletions(-) create mode 100644 test/core/linted_file_test.py diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index b145aa112c2..d574c4a71c9 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -31,7 +31,7 @@ from sqlfluff.core.templaters import TemplatedFile, RawFileSlice # Classes needed only for type checking -from sqlfluff.core.parser.segments.base import BaseSegment, FixPatch +from sqlfluff.core.parser.segments import BaseSegment, FixPatch from sqlfluff.core.linter.common import NoQaDirective diff --git a/src/sqlfluff/core/parser/segments/__init__.py b/src/sqlfluff/core/parser/segments/__init__.py index 6b819710374..78b6cbe7bc1 100644 --- a/src/sqlfluff/core/parser/segments/__init__.py +++ b/src/sqlfluff/core/parser/segments/__init__.py @@ -6,6 +6,7 @@ UnparsableSegment, BracketedSegment, IdentitySet, + FixPatch, ) from sqlfluff.core.parser.segments.generator import SegmentGenerator from sqlfluff.core.parser.segments.raw import ( @@ -47,4 +48,5 @@ "Dedent", "TemplateSegment", "IdentitySet", + "FixPatch", ) diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py new file mode 100644 index 00000000000..f9403f51e11 --- /dev/null +++ b/test/core/linted_file_test.py @@ -0,0 +1,168 @@ +"""The Test file for the linter class.""" + +import pytest +import logging + +from sqlfluff.core.linter import LintedFile +from sqlfluff.core.parser.markers import PositionMarker +from sqlfluff.core.parser.segments import FixPatch, RawSegment +from sqlfluff.core.templaters import RawFileSlice, TemplatedFile + + +@pytest.mark.parametrize( + "source_slices,source_patches,raw_source_string,expected_result", + # NOTE: For all of these examples we're not setting the patch_category + # of the fix patches. They're not used at this step so irrelevant for + # testing. + [ + # Trivial example + ([slice(0, 1)], [], "a", "a"), + # Simple replacement + ( + [slice(0, 1), slice(1, 2), slice(2, 3)], + [FixPatch(slice(1, 2), "d", "", slice(1, 2), "b", "b")], + "abc", + "adc", + ), + # Simple insertion + ( + [slice(0, 1), slice(1, 1), slice(1, 2)], + [FixPatch(slice(1, 1), "b", "", slice(1, 1), "", "")], + "ac", + "abc", + ), + # Simple deletion + ( + [slice(0, 1), slice(1, 2), slice(2, 3)], + [FixPatch(slice(1, 2), "", "", slice(1, 2), "b", "b")], + "abc", + "ac", + ), + # Illustrative templated example (although practically at + # this step, the routine shouldn't care if it's templated). + ( + [slice(0, 2), slice(2, 7), slice(7, 9)], + [FixPatch(slice(2, 3), "{{ b }}", "", slice(2, 7), "b", "{{b}}")], + "a {{b}} c", + "a {{ b }} c", + ), + ], +) +def test__linted_file__build_up_fixed_source_string( + source_slices, source_patches, raw_source_string, expected_result, caplog +): + """Test _build_up_fixed_source_string. + + This is part of fix_string(). + """ + with caplog.at_level(logging.DEBUG, logger="sqlfluff.linter"): + result = LintedFile._build_up_fixed_source_string( + source_slices, source_patches, raw_source_string + ) + assert result == expected_result + + +@pytest.mark.parametrize( + "source_patches,source_only_slices,raw_source_string,expected_result", + # NOTE: For all of these examples we're not setting the patch_category + # of the fix patches. They're not used at this step so irrelevant for + # testing. + [ + # Trivial example + ([], [], "a", [slice(0, 1)]), + # Simple replacement + ( + [FixPatch(slice(1, 2), "d", "", slice(1, 2), "b", "b")], + [], + "abc", + [slice(0, 1), slice(1, 2), slice(2, 3)], + ), + # Basic templated example + # NOTE: No fixes so just one slice. + ( + [], + [], + "a {{ b }} c", + [slice(0, 11)], + ), + # Templated example with a source-only slice. + # NOTE: No fixes so just one slice. + ( + [], + [RawFileSlice("{# b #}", "comment", 2)], + "a {# b #} c", + [slice(0, 11)], + ), + # Templated fix example with a source-only slice. + # NOTE: Correct slicing example + ( + [FixPatch(slice(0, 1), "a ", "", slice(0, 1), "a", "a")], + [RawFileSlice("{# b #}", "comment", 1)], + "a{# b #}c", + [slice(0, 1), slice(1, 9)], + ), + # Templated fix example with a source-only slice. + # NOTE: We insert a slice for the source only slice. + # TODO: given that the logic is based on the _type_ + # of the slice (e.g. comment), would we handle a + # template tag which returns an empty string correctly? + ( + [FixPatch(slice(1, 2), " c", "", slice(8, 9), "c", "c")], + [RawFileSlice("{# b #}", "comment", 1)], + "a{# b #}cc", + [slice(0, 1), slice(1, 8), slice(8, 9), slice(9, 10)], + ), + ], +) +def test__linted_file__slice_source_file_using_patches( + source_patches, source_only_slices, raw_source_string, expected_result, caplog +): + """Test _slice_source_file_using_patches. + + This is part of fix_string(). + """ + with caplog.at_level(logging.DEBUG, logger="sqlfluff.linter"): + result = LintedFile._slice_source_file_using_patches( + source_patches, source_only_slices, raw_source_string + ) + assert result == expected_result + + +templated_file_1 = TemplatedFile.from_string("abc") + + +@pytest.mark.parametrize( + "tree,templated_file,expected_result", + [ + # Trivial example + ( + RawSegment( + "abc", + PositionMarker(slice(0, 3), slice(0, 3), templated_file_1), + "code", + ), + templated_file_1, + [], + ), + # Simple literal edit example + ( + RawSegment( + "abz", + PositionMarker(slice(0, 3), slice(0, 3), templated_file_1), + "code", + ), + templated_file_1, + [FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc")], + ), + ], +) +def test__linted_file__generate_source_patches( + tree, templated_file, expected_result, caplog +): + """Test _generate_source_patches. + + This is part of fix_string(). + """ + with caplog.at_level(logging.DEBUG, logger="sqlfluff.linter"): + result = LintedFile._generate_source_patches(tree, templated_file) + assert result == expected_result diff --git a/test/core/templaters/base_test.py b/test/core/templaters/base_test.py index 632ac281011..0ae54d87267 100644 --- a/test/core/templaters/base_test.py +++ b/test/core/templaters/base_test.py @@ -295,16 +295,39 @@ def test__templated_file_templated_slice_to_source_slice( assert (is_literal, source_slice) == (literal_test, out_slice) -def test__templated_file_source_only_slices(): +@pytest.mark.parametrize( + "file,expected_result", + [ + # Comment example + ( + TemplatedFile( + source_str="a" * 20, + fname="test", + raw_sliced=[ + RawFileSlice("a" * 10, "literal", 0), + RawFileSlice("{# b #}", "comment", 10), + RawFileSlice("a" * 10, "literal", 17), + ], + check_consistency=False, + ), + [RawFileSlice("{# b #}", "comment", 10)], + ), + # Template tags aren't source only. + ( + TemplatedFile( + source_str="aaabbbaaa", + fname="test", + raw_sliced=[ + RawFileSlice("aaa", "literal", 0), + RawFileSlice("{{ b }}", "templated", 3), + RawFileSlice("aaa", "literal", 6), + ], + check_consistency=False, + ), + [], + ), + ], +) +def test__templated_file_source_only_slices(file, expected_result): """Test TemplatedFile.source_only_slices.""" - file = TemplatedFile( - source_str=" Dummy String again ", # NB: has length 20 - fname="test", - raw_sliced=[ - RawFileSlice("a" * 10, "literal", 0), - RawFileSlice("b" * 7, "comment", 10), - RawFileSlice("a" * 10, "literal", 17), - ], - check_consistency=False, - ) - assert file.source_only_slices() == [RawFileSlice("b" * 7, "comment", 10)] + assert file.source_only_slices() == expected_result From 590f1222e3b2e3f66af4da0502aedfea6b7423e2 Mon Sep 17 00:00:00 2001 From: Alan Date: Fri, 8 Jul 2022 10:38:28 +0100 Subject: [PATCH 11/24] More complicated tests --- test/core/linted_file_test.py | 66 ++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py index f9403f51e11..46c48aa695d 100644 --- a/test/core/linted_file_test.py +++ b/test/core/linted_file_test.py @@ -5,8 +5,9 @@ from sqlfluff.core.linter import LintedFile from sqlfluff.core.parser.markers import PositionMarker -from sqlfluff.core.parser.segments import FixPatch, RawSegment +from sqlfluff.core.parser.segments import FixPatch, RawSegment, BaseSegment from sqlfluff.core.templaters import RawFileSlice, TemplatedFile +from sqlfluff.core.templaters.base import TemplatedFileSlice @pytest.mark.parametrize( @@ -129,6 +130,21 @@ def test__linted_file__slice_source_file_using_patches( templated_file_1 = TemplatedFile.from_string("abc") +templated_file_2 = TemplatedFile( + "{# blah #}{{ foo }}bc", + "", + "abc", + [ + TemplatedFileSlice("comment", slice(0, 10), slice(0, 0)), + TemplatedFileSlice("templated", slice(10, 19), slice(0, 1)), + TemplatedFileSlice("literal", slice(19, 21), slice(1, 3)), + ], + [ + RawFileSlice("{# blah #}", "comment", 0), + RawFileSlice("{{ foo }}", "templated", 10), + RawFileSlice("bc", "literal", 19), + ], +) @pytest.mark.parametrize( @@ -154,6 +170,54 @@ def test__linted_file__slice_source_file_using_patches( templated_file_1, [FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc")], ), + # Nested literal edit example + ( + BaseSegment( + [ + RawSegment( + "a", + PositionMarker(slice(0, 1), slice(0, 1), templated_file_1), + "code", + ), + RawSegment( + "b", + PositionMarker(slice(1, 2), slice(1, 2), templated_file_1), + "code", + ), + RawSegment( + "z", + PositionMarker(slice(2, 3), slice(2, 3), templated_file_1), + "code", + ), + ] + ), + templated_file_1, + [FixPatch(slice(0, 3), "abz", "literal", slice(0, 3), "abc", "abc")], + ), + # More complicated templating example + ( + BaseSegment( + [ + RawSegment( + "a", + PositionMarker(slice(0, 20), slice(0, 1), templated_file_2), + "code", + ), + RawSegment( + "b", + PositionMarker(slice(19, 20), slice(1, 2), templated_file_2), + "code", + ), + RawSegment( + "z", + PositionMarker(slice(20, 21), slice(2, 3), templated_file_2), + "code", + ), + ] + ), + templated_file_2, + [FixPatch(slice(2, 3), "z", "literal", slice(20, 21), "c", "c")], + ), ], ) def test__linted_file__generate_source_patches( From af072d52047f2da7d125ae0c2ee152b1db8c91f9 Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 12:30:33 +0100 Subject: [PATCH 12/24] Mechanics and tests for source fixes --- src/sqlfluff/core/linter/linted_file.py | 8 +++ src/sqlfluff/core/parser/segments/base.py | 54 ++++++++++++++++-- src/sqlfluff/core/parser/segments/meta.py | 33 ++++++++++- src/sqlfluff/core/parser/segments/raw.py | 32 ++++++++--- test/core/linted_file_test.py | 68 ++++++++++++++++++++++- 5 files changed, 178 insertions(+), 17 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index d574c4a71c9..73551e86dfa 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -348,6 +348,14 @@ def _generate_source_patches( ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) + # Handle the easy case of an explicit source fix + elif patch.patch_category == "source": + linter_logger.info( + " * Keeping explicit source fix patch: %s", + patch, + ) + filtered_source_patches.append(patch) + dedupe_buffer.append(patch.dedupe_tuple()) # Is it a zero length patch. elif ( patch.source_slice.start == patch.source_slice.stop diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 7c5ae4f3209..7827c5c461d 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -13,7 +13,7 @@ from copy import deepcopy from dataclasses import dataclass, field, replace from io import StringIO -from itertools import takewhile +from itertools import takewhile, chain from typing import ( Any, Callable, @@ -51,6 +51,20 @@ linter_logger = logging.getLogger("sqlfluff.linter") +@dataclass +class SourceFix: + """A stored reference to a fix in the non-templated file.""" + + edit: str + source_slice: slice + # TODO: It might be possible to refactor this to not require + # a templated_slice (because in theory it's unnecessary). + # However much of the fix handling code assumes we need + # a position in the templated file to interpret it. + # More work required to achieve that if desired. + templated_slice: slice + + @dataclass class FixPatch: """An edit patch for a source file.""" @@ -347,6 +361,11 @@ def raw_segments(self) -> List["BaseSegment"]: """Returns a list of raw segments in this segment.""" return self.get_raw_segments() + @cached_property + def source_fixes(self) -> List[SourceFix]: + """Return an source fixes as list.""" + return list(chain.from_iterable(s.source_fixes for s in self.segments)) + @cached_property def first_non_whitespace_segment_raw_upper(self) -> Optional[str]: """Returns the first non-whitespace subsegment of this segment.""" @@ -669,6 +688,7 @@ def _recalculate_caches(self): "matched_length", "raw_segments", "first_non_whitespace_segment_raw_upper", + "source_fixes", ]: self.__dict__.pop(key, None) @@ -1299,6 +1319,25 @@ def _validate_segment_after_fixes(self, rule_code, dialect, fixes_applied, segme def _log_apply_fixes_check_issue(message, *args): # pragma: no cover linter_logger.critical(message, exc_info=True, *args) + def _iter_source_fix_patches( + self, templated_file: TemplatedFile + ) -> Iterator[FixPatch]: + """Yield any source patches as fixes now. + + NOTE: This yields source fixes for the segment and any of it's + children, so it's important to call it at the right point in + the recursion to avoid yielding duplicates. + """ + for source_fix in self.source_fixes: + yield FixPatch( + source_fix.templated_slice, + source_fix.edit, + patch_category="source", + source_slice=source_fix.source_slice, + templated_str=templated_file.templated_str[source_fix.templated_slice], + source_str=templated_file.source_str[source_fix.source_slice], + ) + def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]: """Iterate through the segments generating fix patches. @@ -1311,9 +1350,12 @@ def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]: """ # Does it match? If so we can ignore it. assert self.pos_marker - templated_str = templated_file.templated_str - matches = self.raw == templated_str[self.pos_marker.templated_slice] + templated_raw = templated_file.templated_str[self.pos_marker.templated_slice] + matches = self.raw == templated_raw if matches: + # First yield any source fixes + yield from self._iter_source_fix_patches(templated_file) + # Then return. return # If we're here, the segment doesn't match the original. @@ -1321,13 +1363,15 @@ def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]: "%s at %s: Original: [%r] Fixed: [%r]", type(self).__name__, self.pos_marker.templated_slice, - templated_str[self.pos_marker.templated_slice], + templated_raw, self.raw, ) # If it's all literal, then we don't need to recurse. if self.pos_marker.is_literal(): - # Yield the position in the source file and the patch + # First yield any source fixes + yield from self._iter_source_fix_patches(templated_file) + # Then yield the position in the source file and the patch yield FixPatch.infer_from_template( self.pos_marker.templated_slice, self.raw, diff --git a/src/sqlfluff/core/parser/segments/meta.py b/src/sqlfluff/core/parser/segments/meta.py index 2fc608a392d..e5db9f84445 100644 --- a/src/sqlfluff/core/parser/segments/meta.py +++ b/src/sqlfluff/core/parser/segments/meta.py @@ -1,7 +1,8 @@ """Indent and Dedent classes.""" +from sqlfluff.core.parser.markers import PositionMarker from sqlfluff.core.parser.match_wrapper import match_wrapper -from sqlfluff.core.parser.segments.raw import RawSegment +from sqlfluff.core.parser.segments.raw import RawSegment, SourceFix from sqlfluff.core.parser.context import ParseContext from typing import Optional, List @@ -93,14 +94,20 @@ class TemplateSegment(MetaSegment): type = "placeholder" - def __init__(self, pos_marker=None, source_str="", block_type=""): + def __init__( + self, + pos_marker: Optional[PositionMarker] = None, + source_str: str = "", + block_type: str = "", + source_fixes: Optional[List[SourceFix]] = None, + ): """Initialise a placeholder with the source code embedded.""" if not source_str: # pragma: no cover raise ValueError("Cannot instantiate TemplateSegment without a source_str.") self.source_str = source_str self.block_type = block_type # Call the super of the pos_marker. - super().__init__(pos_marker=pos_marker) + super().__init__(pos_marker=pos_marker, source_fixes=source_fixes) def _suffix(self): """Also output what it's a placeholder for.""" @@ -117,3 +124,23 @@ def to_tuple(self, code_only=False, show_raw=False, include_meta=False): return (self.get_type(), self.source_str) else: # pragma: no cover TODO? return (self.get_type(), self.raw) + + def edit(self, raw=None, source_fixes=None): + """Create a new segment, with exactly the same position but different content. + + Returns: + A copy of this object with new contents. + + Used mostly by fixes. + + """ + if raw: + raise ValueError( + "Cannot set raw of a template placeholder!" + ) # pragma: no cover + return self.__class__( + pos_marker=self.pos_marker, + source_str=self.source_str, + block_type=self.block_type, + source_fixes=source_fixes or self.source_fixes, + ) diff --git a/src/sqlfluff/core/parser/segments/raw.py b/src/sqlfluff/core/parser/segments/raw.py index b0dac3bd40b..2492af7ff33 100644 --- a/src/sqlfluff/core/parser/segments/raw.py +++ b/src/sqlfluff/core/parser/segments/raw.py @@ -4,9 +4,10 @@ any children, and the output of the lexer. """ -from typing import Optional, Tuple +from dataclasses import dataclass +from typing import List, Optional, Tuple -from sqlfluff.core.parser.segments.base import BaseSegment +from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix from sqlfluff.core.parser.markers import PositionMarker @@ -29,6 +30,7 @@ def __init__( name: Optional[str] = None, trim_start: Optional[Tuple[str, ...]] = None, trim_chars: Optional[Tuple[str, ...]] = None, + source_fixes: Optional[List[SourceFix]] = None, ): """Initialise raw segment. @@ -53,6 +55,8 @@ def __init__( self.trim_chars = trim_chars # A cache variable for expandable self._is_expandable = None + # Keep track of any source fixes + self._source_fixes = source_fixes def __repr__(self): return "<{}: ({}) {!r}>".format( @@ -113,6 +117,11 @@ def segments(self): """ return [] + @property + def source_fixes(self) -> List[SourceFix]: + """Return an source fixes as list.""" + return self._source_fixes or [] + # ################ INSTANCE METHODS def invalidate_caches(self): @@ -169,7 +178,7 @@ def _suffix(self): """ return f"{self.raw!r}" - def edit(self, raw): + def edit(self, raw=None, source_fixes=None): """Create a new segment, with exactly the same position but different content. Returns: @@ -179,12 +188,13 @@ def edit(self, raw): """ return self.__class__( - raw=raw, + raw=raw or self.raw, pos_marker=self.pos_marker, type=self._surrogate_type, name=self._surrogate_name, trim_start=self.trim_start, trim_chars=self.trim_chars, + source_fixes=source_fixes or self.source_fixes, ) @@ -260,14 +270,21 @@ def __init__( pos_marker: Optional[PositionMarker] = None, type: Optional[str] = None, name: Optional[str] = None, + source_fixes: Optional[List[SourceFix]] = None, ): """If no other name is provided we extrapolate it from the raw.""" if raw and not name: # names are all lowercase by convention. name = raw.lower() - super().__init__(raw=raw, pos_marker=pos_marker, type=type, name=name) + super().__init__( + raw=raw, + pos_marker=pos_marker, + type=type, + name=name, + source_fixes=source_fixes, + ) - def edit(self, raw): + def edit(self, raw=None, source_fixes=None): """Create a new segment, with exactly the same position but different content. Returns: @@ -277,10 +294,11 @@ def edit(self, raw): """ return self.__class__( - raw=raw, + raw=raw or self.raw, pos_marker=self.pos_marker, type=self._surrogate_type, name=self._surrogate_name, + source_fixes=source_fixes or self.source_fixes, ) diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py index 46c48aa695d..01ba019d975 100644 --- a/test/core/linted_file_test.py +++ b/test/core/linted_file_test.py @@ -5,7 +5,13 @@ from sqlfluff.core.linter import LintedFile from sqlfluff.core.parser.markers import PositionMarker -from sqlfluff.core.parser.segments import FixPatch, RawSegment, BaseSegment +from sqlfluff.core.parser.segments import ( + FixPatch, + RawSegment, + BaseSegment, + TemplateSegment, +) +from sqlfluff.core.parser.segments.raw import SourceFix from sqlfluff.core.templaters import RawFileSlice, TemplatedFile from sqlfluff.core.templaters.base import TemplatedFileSlice @@ -113,6 +119,16 @@ def test__linted_file__build_up_fixed_source_string( "a{# b #}cc", [slice(0, 1), slice(1, 8), slice(8, 9), slice(9, 10)], ), + # Templated example with a source-only slice. + # NOTE: We're fixing the soure only slice. + # TODO: Should we be using the fix type (e.g. "source") + # to somehow determine whether the fix is "safe"? + ( + [FixPatch(slice(2, 2), "{# fixed #}", "", slice(2, 9), "", "")], + [RawFileSlice("{# b #}", "comment", 2)], + "a {# b #} c", + [slice(0, 2), slice(2, 9), slice(9, 11)], + ), ], ) def test__linted_file__slice_source_file_using_patches( @@ -198,9 +214,14 @@ def test__linted_file__slice_source_file_using_patches( ( BaseSegment( [ + TemplateSegment( + PositionMarker(slice(0, 10), slice(0, 0), templated_file_2), + "{# blah #}", + "comment", + ), RawSegment( "a", - PositionMarker(slice(0, 20), slice(0, 1), templated_file_2), + PositionMarker(slice(10, 20), slice(0, 1), templated_file_2), "code", ), RawSegment( @@ -218,6 +239,49 @@ def test__linted_file__slice_source_file_using_patches( templated_file_2, [FixPatch(slice(2, 3), "z", "literal", slice(20, 21), "c", "c")], ), + # Templating example with fixes + ( + BaseSegment( + [ + TemplateSegment( + PositionMarker(slice(0, 10), slice(0, 0), templated_file_2), + "{# blah #}", + "comment", + source_fixes=[ + SourceFix("{# fixed #}", slice(0, 10), slice(0, 0)) + ], + ), + RawSegment( + "a", + PositionMarker(slice(10, 19), slice(0, 1), templated_file_2), + "code", + source_fixes=[ + SourceFix("{{ bar }}", slice(10, 19), slice(0, 1)) + ], + ), + RawSegment( + "b", + PositionMarker(slice(19, 20), slice(1, 2), templated_file_2), + "code", + ), + RawSegment( + "z", + PositionMarker(slice(20, 21), slice(2, 3), templated_file_2), + "code", + ), + ] + ), + templated_file_2, + [ + FixPatch( + slice(0, 0), "{# fixed #}", "source", slice(0, 10), "", "{# blah #}" + ), + FixPatch( + slice(0, 1), "{{ bar }}", "source", slice(10, 19), "a", "{{ foo }}" + ), + FixPatch(slice(2, 3), "z", "literal", slice(20, 21), "c", "c"), + ], + ), ], ) def test__linted_file__generate_source_patches( From fc0a2fe279638164d3b2dc09f9c2311101590a85 Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 13:31:00 +0100 Subject: [PATCH 13/24] Add test cases and get most to pass. --- src/sqlfluff/core/linter/linter.py | 26 +++++------ src/sqlfluff/core/parser/segments/__init__.py | 2 + src/sqlfluff/core/parser/segments/base.py | 12 +++-- src/sqlfluff/core/parser/segments/meta.py | 4 +- src/sqlfluff/core/parser/segments/raw.py | 4 +- src/sqlfluff/core/rules/base.py | 8 ++++ src/sqlfluff/rules/L046.py | 46 +++++++++++++++---- test/fixtures/rules/std_rule_cases/L046.yml | 12 ++++- 8 files changed, 83 insertions(+), 31 deletions(-) diff --git a/src/sqlfluff/core/linter/linter.py b/src/sqlfluff/core/linter/linter.py index bef4a06b93d..ad06126e5ae 100644 --- a/src/sqlfluff/core/linter/linter.py +++ b/src/sqlfluff/core/linter/linter.py @@ -4,16 +4,7 @@ import os import time import logging -from typing import ( - Any, - List, - Sequence, - Optional, - Tuple, - cast, - Iterable, - Iterator, -) +from typing import Any, List, Sequence, Optional, Tuple, cast, Iterable, Iterator, Set import pathspec import regex @@ -34,7 +25,7 @@ from sqlfluff.core.config import FluffConfig, ConfigLoader, progress_bar_configuration # Classes needed only for type checking -from sqlfluff.core.parser.segments.base import BaseSegment +from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix from sqlfluff.core.parser.segments.meta import MetaSegment from sqlfluff.core.parser.segments.raw import RawSegment from sqlfluff.core.rules.base import BaseRule @@ -479,7 +470,7 @@ def lint_fix_parsed( # A placeholder for the fixes we had on the previous loop last_fixes = None # Keep a set of previous versions to catch infinite loops. - previous_versions = {tree.raw} + previous_versions: Set[Tuple[str, Tuple[SourceFix, ...]]] = {(tree.raw, ())} # If we are fixing then we want to loop up to the runaway_limit, otherwise just # once for linting. @@ -595,12 +586,17 @@ def is_first_linter_pass(): new_tree, _, _ = tree.apply_fixes( config.get("dialect_obj"), crawler.code, anchor_info ) - # Check for infinite loops - if new_tree.raw not in previous_versions: + # Check for infinite loops. We use a combination of the fixed + # templated file and the list of source fixes to apply. + loop_check_tuple = ( + new_tree.raw, + tuple(new_tree.source_fixes), + ) + if loop_check_tuple not in previous_versions: # We've not seen this version of the file so # far. Continue. tree = new_tree - previous_versions.add(tree.raw) + previous_versions.add(loop_check_tuple) changed = True continue else: diff --git a/src/sqlfluff/core/parser/segments/__init__.py b/src/sqlfluff/core/parser/segments/__init__.py index 78b6cbe7bc1..ce32820b75e 100644 --- a/src/sqlfluff/core/parser/segments/__init__.py +++ b/src/sqlfluff/core/parser/segments/__init__.py @@ -7,6 +7,7 @@ BracketedSegment, IdentitySet, FixPatch, + SourceFix, ) from sqlfluff.core.parser.segments.generator import SegmentGenerator from sqlfluff.core.parser.segments.raw import ( @@ -49,4 +50,5 @@ "TemplateSegment", "IdentitySet", "FixPatch", + "SourceFix", ) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 7827c5c461d..af635be0836 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -51,7 +51,7 @@ linter_logger = logging.getLogger("sqlfluff.linter") -@dataclass +@dataclass(frozen=True) class SourceFix: """A stored reference to a fix in the non-templated file.""" @@ -64,6 +64,10 @@ class SourceFix: # More work required to achieve that if desired. templated_slice: slice + def __hash__(self): + # Only hash based on the source slice, not the templated slice (which might change) + return hash((self.edit, self.source_slice.start, self.source_slice.stop)) + @dataclass class FixPatch: @@ -1323,7 +1327,7 @@ def _iter_source_fix_patches( self, templated_file: TemplatedFile ) -> Iterator[FixPatch]: """Yield any source patches as fixes now. - + NOTE: This yields source fixes for the segment and any of it's children, so it's important to call it at the right point in the recursion to avoid yielding duplicates. @@ -1463,7 +1467,9 @@ def iter_patches(self, templated_file: TemplatedFile) -> Iterator[FixPatch]: source_str=templated_file.source_str[source_slice], ) - def edit(self, raw): + def edit( + self, raw: Optional[str] = None, source_fixes: Optional[List[SourceFix]] = None + ): """Stub.""" raise NotImplementedError() diff --git a/src/sqlfluff/core/parser/segments/meta.py b/src/sqlfluff/core/parser/segments/meta.py index e5db9f84445..4817d1f5652 100644 --- a/src/sqlfluff/core/parser/segments/meta.py +++ b/src/sqlfluff/core/parser/segments/meta.py @@ -125,7 +125,9 @@ def to_tuple(self, code_only=False, show_raw=False, include_meta=False): else: # pragma: no cover TODO? return (self.get_type(), self.raw) - def edit(self, raw=None, source_fixes=None): + def edit( + self, raw: Optional[str] = None, source_fixes: Optional[List[SourceFix]] = None + ): """Create a new segment, with exactly the same position but different content. Returns: diff --git a/src/sqlfluff/core/parser/segments/raw.py b/src/sqlfluff/core/parser/segments/raw.py index 2492af7ff33..c0d08605b9b 100644 --- a/src/sqlfluff/core/parser/segments/raw.py +++ b/src/sqlfluff/core/parser/segments/raw.py @@ -178,7 +178,9 @@ def _suffix(self): """ return f"{self.raw!r}" - def edit(self, raw=None, source_fixes=None): + def edit( + self, raw: Optional[str] = None, source_fixes: Optional[List[SourceFix]] = None + ): """Create a new segment, with exactly the same position but different content. Returns: diff --git a/src/sqlfluff/core/rules/base.py b/src/sqlfluff/core/rules/base.py index 1f343097981..45a9103f8d6 100644 --- a/src/sqlfluff/core/rules/base.py +++ b/src/sqlfluff/core/rules/base.py @@ -313,6 +313,14 @@ def get_fix_slices( def has_template_conflicts(self, templated_file: TemplatedFile) -> bool: """Based on the fix slices, should we discard the fix?""" + # Check for explicit source fixes. + # TODO: This doesn't account for potentially more complicated source fixes. + # If we're replacing a single segment with many *and* doing source fixes + # then they will be discarded here as unsafe. + if self.edit_type == "replace" and self.edit and len(self.edit) == 1: + edit: BaseSegment = self.edit[0] + if edit.raw == self.anchor.raw and edit.source_fixes: + return False # Given fix slices, check for conflicts. check_fn = all if self.edit_type in ("create_before", "create_after") else any fix_slices = self.get_fix_slices(templated_file, within_only=False) diff --git a/src/sqlfluff/rules/L046.py b/src/sqlfluff/rules/L046.py index 97ce12c92c7..3ef28453f3c 100644 --- a/src/sqlfluff/rules/L046.py +++ b/src/sqlfluff/rules/L046.py @@ -1,10 +1,12 @@ """Implementation of Rule L046.""" from typing import Tuple +from sqlfluff.core.parser.segments import SourceFix from sqlfluff.core.rules.base import ( BaseRule, EvalResultType, LintResult, + LintFix, RuleContext, ) from sqlfluff.core.rules.functional import rsp @@ -126,12 +128,13 @@ def _eval(self, context: RuleContext) -> EvalResultType: position = raw_slice.raw.find(stripped[0]) self.logger.debug( - "Tag string segments: %r | %r | %r | %r | %r @ %s", + "Tag string segments: %r | %r | %r | %r | %r @ %s + %s", tag_pre, ws_pre, inner, ws_post, tag_post, + src_idx, position, ) @@ -144,24 +147,47 @@ def _eval(self, context: RuleContext) -> EvalResultType: if not ws_pre or (ws_pre != " " and "\n" not in ws_pre): pre_fix = " " # Check latter whitespace. - elif not ws_post or (ws_post != " " and "\n" not in ws_post): + if not ws_post or (ws_post != " " and "\n" not in ws_post): post_fix = " " if pre_fix is not None or post_fix is not None: - # Precalculate the fix even though we don't have the - # framework to use it yet. - # fixed = ( - # tag_pre - # + (pre_fix or ws_pre) - # + inner - # + (post_fix or ws_post) - # + tag_post + fixed = ( + tag_pre + + (pre_fix or ws_pre) + + inner + + (post_fix or ws_post) + + tag_post + ) result.append( LintResult( memory=memory, anchor=context.segment, description=f"Jinja tags should have a single " f"whitespace on either side: {stripped}", + fixes=[ + LintFix.replace( + context.segment, + [ + context.segment.edit( + source_fixes=[ + SourceFix( + fixed, + slice( + src_idx + position, + src_idx + + position + + len(stripped), + ), + # NOTE: The templated slice here is going to be a little + # imprecise, but the one that really matters is the source + # slice. + context.segment.pos_marker.templated_slice, + ) + ] + ) + ], + ) + ], ) ) if result: diff --git a/test/fixtures/rules/std_rule_cases/L046.yml b/test/fixtures/rules/std_rule_cases/L046.yml index 96335417070..73cd7a42f75 100644 --- a/test/fixtures/rules/std_rule_cases/L046.yml +++ b/test/fixtures/rules/std_rule_cases/L046.yml @@ -8,16 +8,21 @@ test_simple_modified: pass_str: SELECT 1 from {%+ if true -%} foo {%- endif %} test_simple_modified_fail: - fail_str: SELECT 1 from {%+if true-%} foo {%-endif%} + # Test that the plus/minus notation works fine. + fail_str: SELECT 1 from {%+if true-%} {{ref('foo')}} {%-endif%} + fix_str: SELECT 1 from {%+ if true -%} {{ ref('foo') }} {%- endif %} test_fail_jinja_tags_no_space: fail_str: SELECT 1 from {{ref('foo')}} + fix_str: SELECT 1 from {{ ref('foo') }} test_fail_jinja_tags_multiple_spaces: fail_str: SELECT 1 from {{ ref('foo') }} + fix_str: SELECT 1 from {{ ref('foo') }} test_fail_jinja_tags_no_space_2: fail_str: SELECT 1 from {{+ref('foo')-}} + fix_str: SELECT 1 from {{+ ref('foo') -}} test_pass_newlines: # It's ok if there are newlines. @@ -31,12 +36,17 @@ test_fail_templated_segment_contains_leading_literal: SELECT user_id FROM `{{"gcp_project"}}.{{"dataset"}}.campaign_performance` + fix_str: | + SELECT user_id + FROM + `{{ "gcp_project" }}.{{ "dataset" }}.campaign_performance` configs: core: dialect: bigquery test_fail_segment_contains_multiple_templated_slices_last_one_bad: fail_str: CREATE TABLE `{{ "project" }}.{{ "dataset" }}.{{"table"}}` + fix_str: CREATE TABLE `{{ "project" }}.{{ "dataset" }}.{{ "table" }}` configs: core: dialect: bigquery From 683db58e2b8a9d8582ae110d2425e99c95a67c34 Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 16:46:08 +0100 Subject: [PATCH 14/24] Source fixes - working for rule test cases --- src/sqlfluff/core/linter/linted_file.py | 14 ++++++ src/sqlfluff/core/parser/segments/base.py | 37 ++++++++++++++-- src/sqlfluff/core/parser/segments/raw.py | 1 - src/sqlfluff/core/rules/base.py | 17 +++++-- test/core/linted_file_test.py | 54 +++++++++++++++++++++++ 5 files changed, 116 insertions(+), 7 deletions(-) diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py index 73551e86dfa..a0b855c7294 100644 --- a/src/sqlfluff/core/linter/linted_file.py +++ b/src/sqlfluff/core/linter/linted_file.py @@ -447,6 +447,20 @@ def _slice_source_file_using_patches( slice_buff.append(next_so_slice) source_idx = next_so_slice.stop + # Does this patch cover the next source-only slice directly? + if ( + source_only_slices + and patch.source_slice == source_only_slices[0].source_slice() + ): + linter_logger.info( + "Removing next source only slice from the stack because it " + "covers the same area of source file as the current patch: %s %s", + source_only_slices[0], + patch, + ) + # If it does, remove it so that we don't duplicate it. + source_only_slices.pop(0) + # Is there a gap between current position and this patch? if patch.source_slice.start > source_idx: # Add a slice up to this patch. diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index af635be0836..cfcbb56ae67 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -22,6 +22,7 @@ List, Tuple, Iterator, + TYPE_CHECKING, ) import logging @@ -47,6 +48,9 @@ from sqlfluff.core.parser.context import ParseContext from sqlfluff.core.templaters.base import TemplatedFile +if TYPE_CHECKING: + from sqlfluff.core.rules.base import LintFix + # Instantiate the linter logger (only for use in methods involved with fixing.) linter_logger = logging.getLogger("sqlfluff.linter") @@ -126,10 +130,35 @@ class AnchorEditInfo: create_before: int = field(default=0) create_after: int = field(default=0) fixes: List = field(default_factory=list) + source_fixes: List = field(default_factory=list) + _first_replace: Optional["LintFix"] = field(default=None) + + def add(self, fix: "LintFix"): + """Adds the fix and updates stats. + + We also allow potentially multiple source fixes on the same + anchor by condensing them together here. + """ + if fix.is_just_source_edit(): + self.source_fixes += fix.edit[0].source_fixes + # is there already a replace? + if self._first_replace: + # Condence this fix onto that one + linter_logger.info( + "Multiple edits detected, condensing %s onto %s", + fix, + self._first_replace, + ) + self._first_replace.edit[0] = self._first_replace.edit[0].edit( + source_fixes=self.source_fixes + ) + linter_logger.info("Condensed fix: %s", self._first_replace) + # Return without otherwise adding in this fix. + return - def add(self, fix): - """Adds the fix and updates stats.""" self.fixes.append(fix) + if fix.edit_type == "replace" and not self._first_replace: + self._first_replace = fix setattr(self, fix.edit_type, getattr(self, fix.edit_type) + 1) @property @@ -1284,7 +1313,9 @@ def apply_fixes( return self, [], [] @classmethod - def compute_anchor_edit_info(cls, fixes) -> Dict[int, AnchorEditInfo]: + def compute_anchor_edit_info( + cls, fixes: List["LintFix"] + ) -> Dict[int, AnchorEditInfo]: """Group and count fixes by anchor, return dictionary.""" anchor_info = defaultdict(AnchorEditInfo) # type: ignore for fix in fixes: diff --git a/src/sqlfluff/core/parser/segments/raw.py b/src/sqlfluff/core/parser/segments/raw.py index c0d08605b9b..7d8da148bbe 100644 --- a/src/sqlfluff/core/parser/segments/raw.py +++ b/src/sqlfluff/core/parser/segments/raw.py @@ -4,7 +4,6 @@ any children, and the output of the lexer. """ -from dataclasses import dataclass from typing import List, Optional, Tuple from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix diff --git a/src/sqlfluff/core/rules/base.py b/src/sqlfluff/core/rules/base.py index 45a9103f8d6..cd92563b8e8 100644 --- a/src/sqlfluff/core/rules/base.py +++ b/src/sqlfluff/core/rules/base.py @@ -126,10 +126,10 @@ class LintFix: position to create at (with the existing element at this position to be moved *after* the edit), for a `replace` it implies the segment to be replaced. - edit (:obj:`BaseSegment`, optional): For `replace` and `create` fixes, + edit (iterable of :obj:`BaseSegment`, optional): For `replace` and `create` fixes, this holds the iterable of segments to create or replace at the given `anchor` point. - source (:obj:`BaseSegment`, optional): For `replace` and `create` fixes, + source (iterable of :obj:`BaseSegment`, optional): For `replace` and `create` fixes, this holds iterable of segments that provided code. IMPORTANT: The linter uses this to prevent copying material from templated areas. @@ -199,6 +199,14 @@ def is_trivial(self): return True # pragma: no cover TODO? return False + def is_just_source_edit(self): + return ( + self.edit_type == "replace" + and self.edit + and len(self.edit) == 1 + and self.edit[0].raw == self.anchor.raw + ) + def __repr__(self): if self.edit_type == "delete": detail = f"delete:{self.anchor.raw!r}" @@ -209,7 +217,10 @@ def __repr__(self): new_detail = "".join(s.raw for s in self.edit) if self.edit_type == "replace": - detail = f"edt:{self.anchor.raw!r}->{new_detail!r}" + if self.is_just_source_edit(): + detail = f"src-edt:{self.edit[0].source_fixes!r}" + else: + detail = f"edt:{self.anchor.raw!r}->{new_detail!r}" else: detail = f"create:{new_detail!r}" else: diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py index 01ba019d975..7a4e2f1ef12 100644 --- a/test/core/linted_file_test.py +++ b/test/core/linted_file_test.py @@ -129,6 +129,60 @@ def test__linted_file__build_up_fixed_source_string( "a {# b #} c", [slice(0, 2), slice(2, 9), slice(9, 11)], ), + # Illustrate potential templating bug (case from L046) + ( + [ + FixPatch( + templated_slice=slice(14, 14), + fixed_raw="{%+ if true -%}", + patch_category="source", + source_slice=slice(14, 27), + templated_str="", + source_str="{%+if true-%}", + ), + FixPatch( + templated_slice=slice(14, 14), + fixed_raw="{{ ref('foo') }}", + patch_category="source", + source_slice=slice(28, 42), + templated_str="", + source_str="{{ref('foo')}}", + ), + FixPatch( + templated_slice=slice(17, 17), + fixed_raw="{%- endif %}", + patch_category="source", + source_slice=slice(43, 53), + templated_str="", + source_str="{%-endif%}", + ), + ], + [ + RawFileSlice( + raw="{%+if true-%}", + slice_type="block_start", + source_idx=14, + slice_subtype=None, + block_idx=0, + ), + RawFileSlice( + raw="{%-endif%}", + slice_type="block_end", + source_idx=43, + slice_subtype=None, + block_idx=1, + ), + ], + "SELECT 1 from {%+if true-%} {{ref('foo')}} {%-endif%}", + [ + slice(0, 14), + slice(14, 27), + slice(27, 28), + slice(28, 42), + slice(42, 43), + slice(43, 53), + ], + ), ], ) def test__linted_file__slice_source_file_using_patches( From 8ac55d8e5859d6c58e09d8b412fbec5fd4b59d36 Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 23:12:17 +0100 Subject: [PATCH 15/24] linting --- src/sqlfluff/core/linter/linter.py | 5 ++-- src/sqlfluff/core/parser/segments/base.py | 3 +- src/sqlfluff/core/rules/base.py | 16 ++++++----- src/sqlfluff/rules/L046.py | 34 ++++++++++------------- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/sqlfluff/core/linter/linter.py b/src/sqlfluff/core/linter/linter.py index ad06126e5ae..3845fc2b311 100644 --- a/src/sqlfluff/core/linter/linter.py +++ b/src/sqlfluff/core/linter/linter.py @@ -586,8 +586,9 @@ def is_first_linter_pass(): new_tree, _, _ = tree.apply_fixes( config.get("dialect_obj"), crawler.code, anchor_info ) - # Check for infinite loops. We use a combination of the fixed - # templated file and the list of source fixes to apply. + # Check for infinite loops. We use a combination of the + # fixed templated file and the list of source fixes to + # apply. loop_check_tuple = ( new_tree.raw, tuple(new_tree.source_fixes), diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index cfcbb56ae67..2d5318d8c23 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -69,7 +69,8 @@ class SourceFix: templated_slice: slice def __hash__(self): - # Only hash based on the source slice, not the templated slice (which might change) + # Only hash based on the source slice, not the + # templated slice (which might change) return hash((self.edit, self.source_slice.start, self.source_slice.stop)) diff --git a/src/sqlfluff/core/rules/base.py b/src/sqlfluff/core/rules/base.py index cd92563b8e8..cfbbae733e3 100644 --- a/src/sqlfluff/core/rules/base.py +++ b/src/sqlfluff/core/rules/base.py @@ -126,12 +126,13 @@ class LintFix: position to create at (with the existing element at this position to be moved *after* the edit), for a `replace` it implies the segment to be replaced. - edit (iterable of :obj:`BaseSegment`, optional): For `replace` and `create` fixes, - this holds the iterable of segments to create or replace at the - given `anchor` point. - source (iterable of :obj:`BaseSegment`, optional): For `replace` and `create` fixes, - this holds iterable of segments that provided code. IMPORTANT: The - linter uses this to prevent copying material from templated areas. + edit (iterable of :obj:`BaseSegment`, optional): For `replace` and + `create` fixes, this holds the iterable of segments to create + or replace at the given `anchor` point. + source (iterable of :obj:`BaseSegment`, optional): For `replace` and + `create` fixes, this holds iterable of segments that provided + code. IMPORTANT: The linter uses this to prevent copying material + from templated areas. """ @@ -199,7 +200,8 @@ def is_trivial(self): return True # pragma: no cover TODO? return False - def is_just_source_edit(self): + def is_just_source_edit(self) -> bool: + """Return whether this a valid source only edit.""" return ( self.edit_type == "replace" and self.edit diff --git a/src/sqlfluff/rules/L046.py b/src/sqlfluff/rules/L046.py index 3ef28453f3c..11d97e4b4aa 100644 --- a/src/sqlfluff/rules/L046.py +++ b/src/sqlfluff/rules/L046.py @@ -158,6 +158,20 @@ def _eval(self, context: RuleContext) -> EvalResultType: + (post_fix or ws_post) + tag_post ) + src_fix = [ + SourceFix( + fixed, + slice( + src_idx + position, + src_idx + position + len(stripped), + ), + # NOTE: The templated slice here is + # going to be a little imprecise, but + # the one that really matters is the + # source slice. + context.segment.pos_marker.templated_slice, + ) + ] result.append( LintResult( memory=memory, @@ -167,25 +181,7 @@ def _eval(self, context: RuleContext) -> EvalResultType: fixes=[ LintFix.replace( context.segment, - [ - context.segment.edit( - source_fixes=[ - SourceFix( - fixed, - slice( - src_idx + position, - src_idx - + position - + len(stripped), - ), - # NOTE: The templated slice here is going to be a little - # imprecise, but the one that really matters is the source - # slice. - context.segment.pos_marker.templated_slice, - ) - ] - ) - ], + [context.segment.edit(source_fixes=src_fix)], ) ], ) From 6aec80e18f0e1624cf735a9b431b65f65442009c Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 23:14:29 +0100 Subject: [PATCH 16/24] coverage --- src/sqlfluff/core/parser/segments/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 2d5318d8c23..d35a9aa21cd 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -49,7 +49,7 @@ from sqlfluff.core.templaters.base import TemplatedFile if TYPE_CHECKING: - from sqlfluff.core.rules.base import LintFix + from sqlfluff.core.rules.base import LintFix # pragma: no cover # Instantiate the linter logger (only for use in methods involved with fixing.) linter_logger = logging.getLogger("sqlfluff.linter") From f500d71470dc9b6aeb44ad56c9c4fd7b90deb8a3 Mon Sep 17 00:00:00 2001 From: Alan Date: Sat, 9 Jul 2022 23:21:03 +0100 Subject: [PATCH 17/24] mypy fixes --- src/sqlfluff/core/parser/segments/base.py | 8 +++++++- src/sqlfluff/core/rules/base.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index d35a9aa21cd..7f1562caaaf 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -141,10 +141,16 @@ def add(self, fix: "LintFix"): anchor by condensing them together here. """ if fix.is_just_source_edit(): + assert fix.edit + # is_just_source_edit confirms there will be a list + # so we can hint that to mypy. self.source_fixes += fix.edit[0].source_fixes # is there already a replace? if self._first_replace: - # Condence this fix onto that one + assert self._first_replace.edit + # is_just_source_edit confirms there will be a list + # and that's the only way to get into _first_replace + # if it's populated so we can hint that to mypy. linter_logger.info( "Multiple edits detected, condensing %s onto %s", fix, diff --git a/src/sqlfluff/core/rules/base.py b/src/sqlfluff/core/rules/base.py index cfbbae733e3..e1538d9dc43 100644 --- a/src/sqlfluff/core/rules/base.py +++ b/src/sqlfluff/core/rules/base.py @@ -204,7 +204,7 @@ def is_just_source_edit(self) -> bool: """Return whether this a valid source only edit.""" return ( self.edit_type == "replace" - and self.edit + and self.edit is not None and len(self.edit) == 1 and self.edit[0].raw == self.anchor.raw ) From e52ab92580f9aca760d9e6f74682ceee063037d1 Mon Sep 17 00:00:00 2001 From: Alan Cruickshank Date: Sun, 10 Jul 2022 09:23:07 +0100 Subject: [PATCH 18/24] Update src/sqlfluff/core/parser/segments/base.py Co-authored-by: Barry Hart --- src/sqlfluff/core/parser/segments/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 7f1562caaaf..4ddd7cba6ca 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -132,6 +132,7 @@ class AnchorEditInfo: create_after: int = field(default=0) fixes: List = field(default_factory=list) source_fixes: List = field(default_factory=list) + # First fix of edit_type "replace" in "fixes" _first_replace: Optional["LintFix"] = field(default=None) def add(self, fix: "LintFix"): From 4453142cc768d8c75ff2a54ec993df8221d546e6 Mon Sep 17 00:00:00 2001 From: Alan Cruickshank Date: Sun, 10 Jul 2022 09:23:24 +0100 Subject: [PATCH 19/24] Update src/sqlfluff/core/parser/segments/base.py Co-authored-by: Barry Hart --- src/sqlfluff/core/parser/segments/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 4ddd7cba6ca..0641d802c3d 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -404,7 +404,7 @@ def raw_segments(self) -> List["BaseSegment"]: @cached_property def source_fixes(self) -> List[SourceFix]: - """Return an source fixes as list.""" + """Return any source fixes as list.""" return list(chain.from_iterable(s.source_fixes for s in self.segments)) @cached_property From 443efc69bb919b09d0617ecc6b61045897854972 Mon Sep 17 00:00:00 2001 From: Alan Cruickshank Date: Sun, 10 Jul 2022 09:23:35 +0100 Subject: [PATCH 20/24] Update src/sqlfluff/core/parser/segments/base.py Co-authored-by: Barry Hart --- src/sqlfluff/core/parser/segments/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlfluff/core/parser/segments/base.py b/src/sqlfluff/core/parser/segments/base.py index 0641d802c3d..d84c887e24e 100644 --- a/src/sqlfluff/core/parser/segments/base.py +++ b/src/sqlfluff/core/parser/segments/base.py @@ -1367,7 +1367,7 @@ def _iter_source_fix_patches( ) -> Iterator[FixPatch]: """Yield any source patches as fixes now. - NOTE: This yields source fixes for the segment and any of it's + NOTE: This yields source fixes for the segment and any of its children, so it's important to call it at the right point in the recursion to avoid yielding duplicates. """ From 6d59ddb9de2cf1c4bbf6c5b1c0b5fe596beaabff Mon Sep 17 00:00:00 2001 From: Alan Cruickshank Date: Sun, 10 Jul 2022 09:23:44 +0100 Subject: [PATCH 21/24] Update src/sqlfluff/core/parser/segments/raw.py Co-authored-by: Barry Hart --- src/sqlfluff/core/parser/segments/raw.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlfluff/core/parser/segments/raw.py b/src/sqlfluff/core/parser/segments/raw.py index 7d8da148bbe..7751b20250b 100644 --- a/src/sqlfluff/core/parser/segments/raw.py +++ b/src/sqlfluff/core/parser/segments/raw.py @@ -118,7 +118,7 @@ def segments(self): @property def source_fixes(self) -> List[SourceFix]: - """Return an source fixes as list.""" + """Return any source fixes as list.""" return self._source_fixes or [] # ################ INSTANCE METHODS From c6ecfbeb2c572238fee90b9108ea991b534a0f34 Mon Sep 17 00:00:00 2001 From: Alan Date: Sun, 10 Jul 2022 09:40:20 +0100 Subject: [PATCH 22/24] More comments and test an empty tag fix --- test/core/linted_file_test.py | 41 +++++++++++++++------ test/fixtures/rules/std_rule_cases/L046.yml | 4 ++ 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py index 7a4e2f1ef12..3666b08e7ec 100644 --- a/test/core/linted_file_test.py +++ b/test/core/linted_file_test.py @@ -75,17 +75,23 @@ def test__linted_file__build_up_fixed_source_string( # of the fix patches. They're not used at this step so irrelevant for # testing. [ - # Trivial example + # Trivial example. + # No edits in a single character file. Slice should be one + # character long. ([], [], "a", [slice(0, 1)]), - # Simple replacement + # Simple replacement. + # We've yielded a patch to change a single character. This means + # we should get only slices for that character, and for the + # unchanged file around it. ( [FixPatch(slice(1, 2), "d", "", slice(1, 2), "b", "b")], [], "abc", [slice(0, 1), slice(1, 2), slice(2, 3)], ), - # Basic templated example - # NOTE: No fixes so just one slice. + # Templated no fixes. + # A templated file, but with no fixes, so no subdivision of the + # file is required and we should just get a single slice. ( [], [], @@ -93,7 +99,11 @@ def test__linted_file__build_up_fixed_source_string( [slice(0, 11)], ), # Templated example with a source-only slice. - # NOTE: No fixes so just one slice. + # A templated file, but with no fixes, so no subdivision of the + # file is required and we should just get a single slice. While + # there is handling for "source only" slices like template + # comments, in this case no additional slicing is required + # because no edits have been made. ( [], [RawFileSlice("{# b #}", "comment", 2)], @@ -101,7 +111,9 @@ def test__linted_file__build_up_fixed_source_string( [slice(0, 11)], ), # Templated fix example with a source-only slice. - # NOTE: Correct slicing example + # We're making an edit adjacent to a source only slice. Edits + # _before_ source only slices currently don't trigger additional + # slicing. This is fine. ( [FixPatch(slice(0, 1), "a ", "", slice(0, 1), "a", "a")], [RawFileSlice("{# b #}", "comment", 1)], @@ -109,10 +121,14 @@ def test__linted_file__build_up_fixed_source_string( [slice(0, 1), slice(1, 9)], ), # Templated fix example with a source-only slice. - # NOTE: We insert a slice for the source only slice. + # We've made an edit directly _after_ a source only slice + # which should trigger the logic to ensure that the source + # only slice isn't included in the source mapping of the + # edit. # TODO: given that the logic is based on the _type_ # of the slice (e.g. comment), would we handle a # template tag which returns an empty string correctly? + ( [FixPatch(slice(1, 2), " c", "", slice(8, 9), "c", "c")], [RawFileSlice("{# b #}", "comment", 1)], @@ -120,7 +136,9 @@ def test__linted_file__build_up_fixed_source_string( [slice(0, 1), slice(1, 8), slice(8, 9), slice(9, 10)], ), # Templated example with a source-only slice. - # NOTE: We're fixing the soure only slice. + # Here we're making the fix to the templated slice. This + # checks that we don't duplicate or fumble the slice + # generation when we're explicitly trying to edit the source. # TODO: Should we be using the fix type (e.g. "source") # to somehow determine whether the fix is "safe"? ( @@ -129,7 +147,10 @@ def test__linted_file__build_up_fixed_source_string( "a {# b #} c", [slice(0, 2), slice(2, 9), slice(9, 11)], ), - # Illustrate potential templating bug (case from L046) + # Illustrate potential templating bug (case from L046). + # In this case we have fixes for all our tempolated sections + # and they are all close to eachother and so may be either + # skipped or duplicated if the logic is not precise. ( [ FixPatch( @@ -162,14 +183,12 @@ def test__linted_file__build_up_fixed_source_string( raw="{%+if true-%}", slice_type="block_start", source_idx=14, - slice_subtype=None, block_idx=0, ), RawFileSlice( raw="{%-endif%}", slice_type="block_end", source_idx=43, - slice_subtype=None, block_idx=1, ), ], diff --git a/test/fixtures/rules/std_rule_cases/L046.yml b/test/fixtures/rules/std_rule_cases/L046.yml index 73cd7a42f75..3e8193baa32 100644 --- a/test/fixtures/rules/std_rule_cases/L046.yml +++ b/test/fixtures/rules/std_rule_cases/L046.yml @@ -50,3 +50,7 @@ test_fail_segment_contains_multiple_templated_slices_last_one_bad: configs: core: dialect: bigquery + +test_fail_jinja_tags_no_space_no_content: + fail_str: SELECT {{-""-}}1 + fix_str: SELECT {{- "" -}}1 From d44aa307126c4c0cee61ff066affea6f8e3f5d00 Mon Sep 17 00:00:00 2001 From: Alan Date: Sun, 10 Jul 2022 10:05:56 +0100 Subject: [PATCH 23/24] Linting --- test/core/linted_file_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/core/linted_file_test.py b/test/core/linted_file_test.py index 3666b08e7ec..53d4e1083a0 100644 --- a/test/core/linted_file_test.py +++ b/test/core/linted_file_test.py @@ -128,7 +128,6 @@ def test__linted_file__build_up_fixed_source_string( # TODO: given that the logic is based on the _type_ # of the slice (e.g. comment), would we handle a # template tag which returns an empty string correctly? - ( [FixPatch(slice(1, 2), " c", "", slice(8, 9), "c", "c")], [RawFileSlice("{# b #}", "comment", 1)], From 553b068d7f0ee65bcc74af2e3f1af9cf5b255d69 Mon Sep 17 00:00:00 2001 From: Alan Date: Sun, 10 Jul 2022 20:04:07 +0100 Subject: [PATCH 24/24] test fixes --- test/fixtures/rules/std_rule_cases/L046.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/fixtures/rules/std_rule_cases/L046.yml b/test/fixtures/rules/std_rule_cases/L046.yml index 3e8193baa32..119a0766bee 100644 --- a/test/fixtures/rules/std_rule_cases/L046.yml +++ b/test/fixtures/rules/std_rule_cases/L046.yml @@ -52,5 +52,9 @@ test_fail_segment_contains_multiple_templated_slices_last_one_bad: dialect: bigquery test_fail_jinja_tags_no_space_no_content: - fail_str: SELECT {{-""-}}1 - fix_str: SELECT {{- "" -}}1 + fail_str: SELECT {{""-}}1 + fix_str: SELECT {{ "" -}}1 + +test_fail_jinja_tags_across_segment_boundaries: + fail_str: SELECT a{{-"1 + b"}}2 + fix_str: SELECT a{{- "1 + b" }}2