sqlfluff · barrywhart · Oct 16, 2021 · Oct 14, 2021 · Oct 14, 2021 · Oct 14, 2021
diff --git a/.gitignore b/.gitignore
@@ -42,3 +42,4 @@ test-reports
 # Ignore conda environment.yml contributors might be using and direnv config
 environment.yml
 .envrc
+**/*FIXED.sql
diff --git a/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py b/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py
@@ -439,7 +439,3 @@ def _unsafe_process(self, fname, in_str=None, config=None):
             # No violations returned in this way.
             [],
         )
-
-    @classmethod
-    def _preprocess_template(cls, in_str: str) -> str:
-        return in_str
diff --git a/plugins/sqlfluff-templater-dbt/test/fixtures/dbt/dbt_project/macros/echo.sql b/plugins/sqlfluff-templater-dbt/test/fixtures/dbt/dbt_project/macros/echo.sql
@@ -0,0 +1,3 @@
+{% macro echo(colname) %}
+{{colname}}
+{% endmacro %}
diff --git a/...sqlfluff-templater-dbt/test/fixtures/dbt/dbt_project/models/my_new_project/issue_1608.sql b/...sqlfluff-templater-dbt/test/fixtures/dbt/dbt_project/models/my_new_project/issue_1608.sql
@@ -0,0 +1,16 @@
+{{ config(materialized='view') }}
+
+with cte_example as (
+     select 1 as col_name
+),
+
+final as
+(
+    select
+        col_name,
+        {{- echo('col_name') -}} as col_name2
+    from
+        cte_example
+)
+
+select * from final
diff --git a/...ff-templater-dbt/test/fixtures/dbt/dbt_project/models/my_new_project/issue_1608.sql.after b/...ff-templater-dbt/test/fixtures/dbt/dbt_project/models/my_new_project/issue_1608.sql.after
@@ -0,0 +1,16 @@
+{{ config(materialized='view') }}
+
+with cte_example as (
+    select 1 as col_name
+),
+
+final as
+(
+    select
+        col_name,
+        {{- echo('col_name') -}}         as col_name2
+    from
+        cte_example
+)
+
+select * from final
diff --git a/plugins/sqlfluff-templater-dbt/test/templater_test.py b/plugins/sqlfluff-templater-dbt/test/templater_test.py
@@ -215,6 +215,24 @@ def test__dbt_templated_models_do_not_raise_lint_error(
     assert len(violations) == 0
 
 
+def test__dbt_templated_models_fix_does_not_corrupt_file(project_dir):  # noqa: F811
+    """Test fix for issue 1608. Previously "sqlfluff fix" corrupted the file."""
+    lntr = Linter(config=FluffConfig(configs=DBT_FLUFF_CONFIG))
+    lnt = lntr.lint_path(
+        os.path.join(project_dir, "models/my_new_project/issue_1608.sql"), fix=True
+    )
+    lnt.persist_changes(fixed_file_suffix="FIXED")
+    with open(
+        os.path.join(project_dir, "models/my_new_project/issue_1608.sql.after")
+    ) as f:
+        comp_buff = f.read()
+    with open(
+        os.path.join(project_dir, "models/my_new_project/issue_1608FIXED.sql")
+    ) as f:
+        fixed_buff = f.read()
+    assert fixed_buff == comp_buff
+
+
 def test__templater_dbt_templating_absolute_path(
     project_dir, dbt_templater  # noqa: F811
 ):

diff --git a/src/sqlfluff/core/linter/linted_file.py b/src/sqlfluff/core/linter/linted_file.py
@@ -382,31 +382,31 @@ def fix_string(self) -> Tuple[Any, bool]:
                         "        - Skipping edit patch on non-unique templated content: %s",
                         enriched_patch,
                     )
-                    continue
-                # We have a single occurrence of the thing we want to patch. This
-                # means we can use its position to place our patch.
-                new_source_slice = slice(
-                    enriched_patch.source_slice.start + positions[0],
-                    enriched_patch.source_slice.start
-                    + positions[0]
-                    + len(enriched_patch.templated_str),
-                )
-                enriched_patch = EnrichedFixPatch(
-                    source_slice=new_source_slice,
-                    templated_slice=enriched_patch.templated_slice,
-                    patch_category=enriched_patch.patch_category,
-                    fixed_raw=enriched_patch.fixed_raw,
-                    templated_str=enriched_patch.templated_str,
-                    source_str=enriched_patch.source_str,
-                )
-                linter_logger.debug(
-                    "      * Keeping Tricky Case. Positions: %s, New Slice: %s, Patch: %s",
-                    positions,
-                    new_source_slice,
-                    enriched_patch,
-                )
-                filtered_source_patches.append(enriched_patch)
-                dedupe_buffer.append(enriched_patch.dedupe_tuple())
+                else:  # pragma: no cover
+                    # We have a single occurrence of the thing we want to patch. This
+                    # means we can use its position to place our patch.
+                    new_source_slice = slice(
+                        enriched_patch.source_slice.start + positions[0],
+                        enriched_patch.source_slice.start
+                        + positions[0]
+                        + len(enriched_patch.templated_str),
+                    )
+                    enriched_patch = EnrichedFixPatch(
+                        source_slice=new_source_slice,
+                        templated_slice=enriched_patch.templated_slice,
+                        patch_category=enriched_patch.patch_category,
+                        fixed_raw=enriched_patch.fixed_raw,
+                        templated_str=enriched_patch.templated_str,
+                        source_str=enriched_patch.source_str,
+                    )
+                    linter_logger.debug(
+                        "      * Keeping Tricky Case. Positions: %s, New Slice: %s, Patch: %s",
+                        positions,
+                        new_source_slice,
+                        enriched_patch,
+                    )
+                    filtered_source_patches.append(enriched_patch)
+                    dedupe_buffer.append(enriched_patch.dedupe_tuple())
                 continue
 
         # Sort the patches before building up the file.

diff --git a/src/sqlfluff/core/templaters/base.py b/src/sqlfluff/core/templaters/base.py
@@ -310,7 +310,7 @@ def templated_slice_to_source_slice(
             if ts_start_sf_start < len(self.sliced_file):
                 return self.sliced_file[1].source_slice
             else:
-                return self.sliced_file[-1].source_slice
+                return self.sliced_file[-1].source_slice  # pragma: no cover
         else:
             start_slices = self.sliced_file[ts_start_sf_start:ts_start_sf_stop]
         if ts_stop_sf_start == ts_stop_sf_stop:  # pragma: no cover TODO?

diff --git a/src/sqlfluff/core/templaters/jinja.py b/src/sqlfluff/core/templaters/jinja.py
@@ -330,12 +330,52 @@ def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]:
         }
 
         # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex
-        for _, elem_type, raw in env.lex(cls._preprocess_template(in_str)):
+        for _, elem_type, raw in env.lex(in_str):
             if elem_type == "data":
                 yield RawFileSlice(raw, "literal", idx)
                 idx += len(raw)
                 continue
             str_buff += raw
+
+            if elem_type.endswith("_begin"):
+                # When a "begin" tag (whether block, comment, or data) uses
+                # whitespace stripping
+                # (https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control),
+                # the Jinja lex() function handles this by discarding adjacent
+                # whitespace from in_str. For more insight, see the tokeniter()
+                # function in this file:
+                # https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py
+                # We want to detect and correct for this in order to:
+                # - Correctly update "idx" (if this is wrong, that's a
+                #   potential DISASTER because lint fixes use this info to
+                #   update the source file, and incorrect values often result in
+                #   CORRUPTING the user's file so it's no longer valid SQL. :-O
+                # - Guarantee that the slices we return fully "cover" the
+                #   contents of in_str.
+                #
+                # We detect skipped characters by looking ahead in in_str for
+                # the token just returned from lex(). The token text will either
+                # be at the current 'idx' position (if whitespace stripping did
+                # not occur) OR it'll be farther along in in_str, but we're
+                # GUARANTEED that lex() only skips over WHITESPACE; nothing else.
+
+                # Find the token returned. Did lex() skip over any characters?
+                num_chars_skipped = in_str.index(raw, idx) - idx
+                if num_chars_skipped:
+                    # Yes. It skipped over some characters. Compute a string
+                    # containing the skipped characters.
+                    skipped_str = in_str[idx : idx + num_chars_skipped]
+
+                    # Sanity check: Verify that Jinja only skips over
+                    # WHITESPACE, never anything else.
+                    if not skipped_str.isspace():  # pragma: no cover
+                        templater_logger.warning(
+                            "Jinja lex() skipped non-whitespace: %s", skipped_str
+                        )
+                    # Treat the skipped whitespace as a literal.
+                    yield RawFileSlice(skipped_str, "literal", idx)
+                    idx += num_chars_skipped
+
             # raw_end and raw_begin behave a little differently in
             # that the whole tag shows up in one go rather than getting
             # parts of the tag at a time.
@@ -361,22 +401,23 @@ def _slice_template(cls, in_str: str) -> Iterator[RawFileSlice]:
                         block_type = "block_start"
                         if trimmed_content.split()[0] == "for":
                             block_subtype = "loop"
-                yield RawFileSlice(str_buff, block_type, idx, block_subtype)
-                idx += len(str_buff)
+                m = re.search(r"\s+$", raw, re.MULTILINE | re.DOTALL)
+                if raw.startswith("-") and m:
+                    # Right whitespace was stripped. Split off the trailing
+                    # whitespace into a separate slice. The desired behavior is
+                    # to behave similarly as the left stripping case above.
+                    # Note that the stakes are a bit different, because lex()
+                    # hasn't *omitted* any characters from the strings it
+                    # returns, it has simply grouped them differently than we
+                    # want.
+                    trailing_chars = len(m.group(0))
+                    yield RawFileSlice(
+                        str_buff[:-trailing_chars], block_type, idx, block_subtype
+                    )
+                    idx += len(str_buff) - trailing_chars
+                    yield RawFileSlice(str_buff[-trailing_chars:], "literal", idx)
+                    idx += trailing_chars
+                else:
+                    yield RawFileSlice(str_buff, block_type, idx, block_subtype)
+                    idx += len(str_buff)
                 str_buff = ""
-
-    @classmethod
-    def _preprocess_template(cls, in_str: str) -> str:
-        """Does any preprocessing of the template required before expansion."""
-        # Using Jinja whitespace stripping (e.g. `{%-` or `-%}`) breaks the
-        # position markers between unlexed and lexed file. So let's ignore any
-        # request to do that before lexing, by replacing '-' with '+'
-        #
-        # Note: '+' is the default, so shouldn't really be needed but we
-        # explicitly state that to preserve the space for the missing '-' character
-        # so it looks the same.
-        in_str = in_str.replace("{%-", "{%+")
-        in_str = in_str.replace("-%}", "+%}")
-        in_str = in_str.replace("{#-", "{#+")
-        in_str = in_str.replace("-#}", "+#}")
-        return in_str