From 32687ca7209ba9ce8251689d403c91026c128dc6 Mon Sep 17 00:00:00 2001 From: Alan Cruickshank Date: Wed, 13 Jul 2022 11:15:15 +0100 Subject: [PATCH] Add large file check (#3600) * Add large file check * Change to decorator * Fix test cases --- .../sqlfluff_templater_dbt/templater.py | 3 +- src/sqlfluff/core/default_config.cfg | 5 +++ src/sqlfluff/core/templaters/base.py | 32 +++++++++++++++-- src/sqlfluff/core/templaters/jinja.py | 2 ++ src/sqlfluff/core/templaters/placeholder.py | 5 +-- src/sqlfluff/core/templaters/python.py | 2 ++ test/core/templaters/jinja_test.py | 35 +++++++++++++++++++ test/core/templaters/python_test.py | 22 ++++++++++++ 8 files changed, 101 insertions(+), 5 deletions(-) diff --git a/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py b/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py index 8047fbebc1b..e1dc9701509 100644 --- a/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py +++ b/plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py @@ -25,7 +25,7 @@ from sqlfluff.core.cached_property import cached_property from sqlfluff.core.errors import SQLTemplaterError, SQLTemplaterSkipFile -from sqlfluff.core.templaters.base import TemplatedFile +from sqlfluff.core.templaters.base import TemplatedFile, large_file_check from sqlfluff.core.templaters.jinja import JinjaTemplater @@ -308,6 +308,7 @@ def sequence_files( if fname not in already_yielded: yield fname + @large_file_check def process(self, *, fname, in_str=None, config=None, formatter=None): """Compile a dbt model and return the compiled SQL. diff --git a/src/sqlfluff/core/default_config.cfg b/src/sqlfluff/core/default_config.cfg index 1d240f3632e..dfba2d65372 100644 --- a/src/sqlfluff/core/default_config.cfg +++ b/src/sqlfluff/core/default_config.cfg @@ -34,6 +34,11 @@ sql_file_exts = .sql,.sql.j2,.dml,.ddl # Allow fix to run on files, even if they contain parsing errors # Note altering this is NOT RECOMMENDED as can corrupt SQL fix_even_unparsable = False +# Very large files can make the parser effectively hang. +# This limit skips files over a certain character length +# and warns the user what has happened. +# Set this to 0 to disable. +large_file_skip_char_limit = 20000 [sqlfluff:indentation] # See https://docs.sqlfluff.com/en/stable/indentation.html diff --git a/src/sqlfluff/core/templaters/base.py b/src/sqlfluff/core/templaters/base.py index 2b91a08659e..c51b6bd0d1a 100644 --- a/src/sqlfluff/core/templaters/base.py +++ b/src/sqlfluff/core/templaters/base.py @@ -3,6 +3,7 @@ import logging from bisect import bisect_left from typing import Dict, Iterator, List, Tuple, Optional, NamedTuple, Iterable +from sqlfluff.core.config import FluffConfig from sqlfluff.core.errors import SQLTemplaterSkipFile @@ -22,6 +23,32 @@ def iter_indices_of_newlines(raw_str: str) -> Iterator[int]: break # pragma: no cover TODO? +def large_file_check(func): + """Raise an exception if the file is over a defined size. + + Designed to be implemented as a decorator on `.process()` methods. + + If no config is provided or the relevant config value is set + to zero then the check is skipped. + """ + + def _wrapped( + self, *, in_str: str, fname: str, config: FluffConfig = None, **kwargs + ): + if config: + limit = config.get("large_file_skip_char_limit") + if limit and len(in_str) > limit: + raise SQLTemplaterSkipFile( + f"Length of file {fname!r} is over {limit} characters. " + "Skipping to avoid parser lock. Users can increase this limit " + "in their config by setting the 'large_file_skip_char_limit' " + "value, or disable by setting it to zero." + ) + return func(self, in_str=in_str, fname=fname, config=config, **kwargs) + + return _wrapped + + class RawFileSlice(NamedTuple): """A slice referring to a raw file.""" @@ -432,14 +459,15 @@ def __init__(self, **kwargs): """ def sequence_files( - self, fnames: List[str], config=None, formatter=None + self, fnames: List[str], config: FluffConfig = None, formatter=None ) -> Iterable[str]: """Given files to be processed, return a valid processing sequence.""" # Default is to process in the original order. return fnames + @large_file_check def process( - self, *, in_str: str, fname: str, config=None, formatter=None + self, *, in_str: str, fname: str, config: FluffConfig = None, formatter=None ) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return a TemplatedFile. diff --git a/src/sqlfluff/core/templaters/jinja.py b/src/sqlfluff/core/templaters/jinja.py index a1fa56c3360..1e9a8e25a73 100644 --- a/src/sqlfluff/core/templaters/jinja.py +++ b/src/sqlfluff/core/templaters/jinja.py @@ -22,6 +22,7 @@ RawFileSlice, TemplatedFile, TemplatedFileSlice, + large_file_check, ) from sqlfluff.core.templaters.python import PythonTemplater from sqlfluff.core.templaters.slicers.tracer import JinjaAnalyzer @@ -307,6 +308,7 @@ def make_template(in_str): return env, live_context, make_template + @large_file_check def process( self, *, in_str: str, fname: str, config=None, formatter=None ) -> Tuple[Optional[TemplatedFile], list]: diff --git a/src/sqlfluff/core/templaters/placeholder.py b/src/sqlfluff/core/templaters/placeholder.py index 6ce067cca9b..8627e4bf576 100644 --- a/src/sqlfluff/core/templaters/placeholder.py +++ b/src/sqlfluff/core/templaters/placeholder.py @@ -11,10 +11,10 @@ RawFileSlice, TemplatedFile, TemplatedFileSlice, + large_file_check, + RawTemplater, ) -from sqlfluff.core.templaters.base import RawTemplater - # Instantiate the templater logger templater_logger = logging.getLogger("sqlfluff.templater") @@ -110,6 +110,7 @@ def get_context(self, config) -> Dict: return live_context + @large_file_check def process( self, *, in_str: str, fname: str, config=None, formatter=None ) -> Tuple[Optional[TemplatedFile], list]: diff --git a/src/sqlfluff/core/templaters/python.py b/src/sqlfluff/core/templaters/python.py index 9c70829b508..512429a881e 100644 --- a/src/sqlfluff/core/templaters/python.py +++ b/src/sqlfluff/core/templaters/python.py @@ -13,6 +13,7 @@ templater_logger, RawFileSlice, TemplatedFileSlice, + large_file_check, ) @@ -197,6 +198,7 @@ def get_context(self, fname=None, config=None, **kw) -> Dict: live_context[k] = self.infer_type(live_context[k]) return live_context + @large_file_check def process( self, *, in_str: str, fname: str, config=None, formatter=None ) -> Tuple[Optional[TemplatedFile], list]: diff --git a/test/core/templaters/jinja_test.py b/test/core/templaters/jinja_test.py index 6aa2db357c3..c29267fde03 100644 --- a/test/core/templaters/jinja_test.py +++ b/test/core/templaters/jinja_test.py @@ -4,6 +4,7 @@ from typing import List, NamedTuple import pytest +from sqlfluff.core.errors import SQLTemplaterSkipFile from sqlfluff.core.templaters import JinjaTemplater from sqlfluff.core.templaters.base import RawFileSlice, TemplatedFile @@ -1158,3 +1159,37 @@ def test__templater_jinja_slice_file(raw_file, override_context, result, caplog) for templated_file_slice in sliced_file ] assert actual == result + + +def test__templater_jinja_large_file_check(): + """Test large file skipping. + + The check is seperately called on each .process() method + so it makes sense to test a few templaters. + """ + # First check we can process the file normally without specific config. + # i.e. check the defaults work and the default is high. + JinjaTemplater().process( + in_str="SELECT 1", + fname="", + config=FluffConfig(overrides={"dialect": "ansi"}), + ) + # Second check setting the value low disables the check + JinjaTemplater().process( + in_str="SELECT 1", + fname="", + config=FluffConfig( + overrides={"dialect": "ansi", "large_file_skip_char_limit": 0} + ), + ) + # Finally check we raise a skip exception when config is set low. + with pytest.raises(SQLTemplaterSkipFile) as excinfo: + JinjaTemplater().process( + in_str="SELECT 1", + fname="", + config=FluffConfig( + overrides={"dialect": "ansi", "large_file_skip_char_limit": 2}, + ), + ) + + assert "Length of file" in str(excinfo.value) diff --git a/test/core/templaters/python_test.py b/test/core/templaters/python_test.py index 27a86cdaf5c..118601a40e3 100644 --- a/test/core/templaters/python_test.py +++ b/test/core/templaters/python_test.py @@ -2,6 +2,7 @@ import pytest import logging +from sqlfluff.core.errors import SQLTemplaterSkipFile from sqlfluff.core.templaters import PythonTemplater from sqlfluff.core import SQLTemplaterError, FluffConfig @@ -473,3 +474,24 @@ def test__templater_python_slice_file(raw_file, templated_file, unwrap_wrapped, prev_slice = (templated_slice.source_slice, templated_slice.templated_slice) # check result assert resp == result + + +def test__templater_python_large_file_check(): + """Test large file skipping. + + The check is seperately called on each .process() method + so it makes sense to test a few templaters. + """ + # First check we can process the file normally without config. + PythonTemplater().process(in_str="SELECT 1", fname="") + # Then check we raise a skip exception when config is set low. + with pytest.raises(SQLTemplaterSkipFile) as excinfo: + PythonTemplater().process( + in_str="SELECT 1", + fname="", + config=FluffConfig( + overrides={"dialect": "ansi", "large_file_skip_char_limit": 2}, + ), + ) + + assert "Length of file" in str(excinfo.value)