Skip to content

Commit

Permalink
Add a file size check in bytes (#3770)
Browse files Browse the repository at this point in the history
* Add a file size check in bytes

* extend the test to get better coverage

* Update src/sqlfluff/core/templaters/base.py

Co-authored-by: Barry Hart <barrywhart@yahoo.com>

* Update src/sqlfluff/core/templaters/base.py

Co-authored-by: Barry Hart <barrywhart@yahoo.com>

Co-authored-by: Barry Hart <barrywhart@yahoo.com>
  • Loading branch information
alanmcruickshank and barrywhart committed Aug 23, 2022
1 parent 4f3ceb9 commit 568e698
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 28 deletions.
Expand Up @@ -23,7 +23,7 @@
from jinja2_simple_tags import StandaloneTag

from sqlfluff.core.cached_property import cached_property
from sqlfluff.core.errors import SQLTemplaterError, SQLTemplaterSkipFile
from sqlfluff.core.errors import SQLTemplaterError, SQLFluffSkipFile

from sqlfluff.core.templaters.base import TemplatedFile, large_file_check

Expand Down Expand Up @@ -399,10 +399,10 @@ def _find_node(self, fname, config=None):
if not results:
skip_reason = self._find_skip_reason(fname)
if skip_reason:
raise SQLTemplaterSkipFile(
raise SQLFluffSkipFile(
f"Skipped file {fname} because it is {skip_reason}"
)
raise SQLTemplaterSkipFile(
raise SQLFluffSkipFile(
"File %s was not found in dbt project" % fname
) # pragma: no cover
return results[0]
Expand Down Expand Up @@ -491,7 +491,7 @@ def make_template(in_str):
fname,
)
# Additional error logging in case we get a fatal dbt error.
raise SQLTemplaterSkipFile( # pragma: no cover
raise SQLFluffSkipFile( # pragma: no cover
f"Skipped file {fname} because dbt raised a fatal "
f"exception during compilation: {err!s}"
) from err
Expand Down
4 changes: 2 additions & 2 deletions plugins/sqlfluff-templater-dbt/test/templater_test.py
Expand Up @@ -11,7 +11,7 @@
import pytest

from sqlfluff.core import FluffConfig, Lexer, Linter
from sqlfluff.core.errors import SQLTemplaterSkipFile
from sqlfluff.core.errors import SQLFluffSkipFile
from sqlfluff_templater_dbt.templater import DBT_VERSION_TUPLE
from test.fixtures.dbt.templater import ( # noqa: F401
DBT_FLUFF_CONFIG,
Expand Down Expand Up @@ -268,7 +268,7 @@ def test__templater_dbt_skips_file(
path, reason, dbt_templater, project_dir # noqa: F811
):
"""A disabled dbt model should be skipped."""
with pytest.raises(SQLTemplaterSkipFile, match=reason):
with pytest.raises(SQLFluffSkipFile, match=reason):
dbt_templater.process(
in_str="",
fname=os.path.join(project_dir, path),
Expand Down
11 changes: 7 additions & 4 deletions src/sqlfluff/core/default_config.cfg
Expand Up @@ -35,10 +35,13 @@ sql_file_exts = .sql,.sql.j2,.dml,.ddl
# Note altering this is NOT RECOMMENDED as can corrupt SQL
fix_even_unparsable = False
# Very large files can make the parser effectively hang.
# This limit skips files over a certain character length
# and warns the user what has happened.
# Set this to 0 to disable.
large_file_skip_char_limit = 20000
# The more efficient check is the _byte_ limit check which
# is enabled by default. The previous _character_ limit check
# is still present for backward compatability. This will be
# removed in a future version.
# Set either to 0 to disable.
large_file_skip_char_limit = 0
large_file_skip_byte_limit = 20000
# CPU processes to use while linting.
# If positive, just implies number of processes.
# If negative or zero, implies number_of_cpus - specifed_number.
Expand Down
2 changes: 1 addition & 1 deletion src/sqlfluff/core/errors.py
Expand Up @@ -106,7 +106,7 @@ class SQLTemplaterError(SQLBaseError):
_identifier = "templating"


class SQLTemplaterSkipFile(RuntimeError):
class SQLFluffSkipFile(RuntimeError):
"""An error returned from a templater to skip a file."""

pass
Expand Down
27 changes: 22 additions & 5 deletions src/sqlfluff/core/linter/linter.py
Expand Up @@ -15,7 +15,7 @@
SQLLexError,
SQLLintError,
SQLParseError,
SQLTemplaterSkipFile,
SQLFluffSkipFile,
)
from sqlfluff.core.parser import Lexer, Parser, RegexLexer
from sqlfluff.core.file_helpers import get_encoding
Expand Down Expand Up @@ -105,6 +105,19 @@ def _load_raw_file_and_config(
"""Load a raw file and the associated config."""
file_config = root_config.make_child_from_path(fname)
encoding = get_encoding(fname=fname, config=file_config)
# Check file size before loading.
limit = file_config.get("large_file_skip_byte_limit")
if limit:
# Get the file size
file_size = os.path.getsize(fname)
if file_size > limit:
raise SQLFluffSkipFile(
f"Length of file {fname!r} is {file_size} bytes which is over "
f"the limit of {limit} bytes. Skipping to avoid parser lock. "
"Users can increase this limit in their config by setting the "
"'large_file_skip_byte_limit' value, or disable by setting it "
"to zero."
)
with open(fname, encoding=encoding, errors="backslashreplace") as target_file:
raw_file = target_file.read()
# Scan the raw file for config commands.
Expand Down Expand Up @@ -784,7 +797,7 @@ def render_string(
templated_file, templater_violations = self.templater.process(
in_str=in_str, fname=fname, config=config, formatter=self.formatter
)
except SQLTemplaterSkipFile as s: # pragma: no cover
except SQLFluffSkipFile as s: # pragma: no cover
linter_logger.warning(str(s))
templated_file = None
templater_violations = []
Expand Down Expand Up @@ -1181,9 +1194,13 @@ def parse_path(
if self.formatter:
self.formatter.dispatch_path(path)
# Load the file with the config and yield the result.
raw_file, config, encoding = self._load_raw_file_and_config(
fname, self.config
)
try:
raw_file, config, encoding = self._load_raw_file_and_config(
fname, self.config
)
except SQLFluffSkipFile as s:
linter_logger.warning(str(s))
continue
yield self.parse_string(
raw_file,
fname=fname,
Expand Down
6 changes: 5 additions & 1 deletion src/sqlfluff/core/linter/runner.py
Expand Up @@ -18,6 +18,7 @@
from typing import Callable, List, Tuple, Iterator

from sqlfluff.core import FluffConfig, Linter
from sqlfluff.core.errors import SQLFluffSkipFile
from sqlfluff.core.linter import LintedFile

linter_logger: logging.Logger = logging.getLogger("sqlfluff.linter")
Expand All @@ -41,7 +42,10 @@ def iter_rendered(self, fnames: List[str]) -> Iterator[Tuple]:
for fname in self.linter.templater.sequence_files(
fnames, config=self.config, formatter=self.linter.formatter
):
yield fname, self.linter.render_file(fname, self.config)
try:
yield fname, self.linter.render_file(fname, self.config)
except SQLFluffSkipFile as s:
linter_logger.warning(str(s))

def iter_partials(
self,
Expand Down
18 changes: 12 additions & 6 deletions src/sqlfluff/core/templaters/base.py
Expand Up @@ -5,7 +5,7 @@
from typing import Dict, Iterator, List, Tuple, Optional, NamedTuple, Iterable
from sqlfluff.core.config import FluffConfig

from sqlfluff.core.errors import SQLTemplaterSkipFile
from sqlfluff.core.errors import SQLFluffSkipFile

# Instantiate the templater logger
templater_logger = logging.getLogger("sqlfluff.templater")
Expand Down Expand Up @@ -37,8 +37,14 @@ def _wrapped(
):
if config:
limit = config.get("large_file_skip_char_limit")
if limit:
templater_logger.warning(
"The config value large_file_skip_char_limit was found set. "
"This feature will be removed in a future release, please "
"use the more efficient 'large_file_skip_byte_limit' instead."
)
if limit and len(in_str) > limit:
raise SQLTemplaterSkipFile(
raise SQLFluffSkipFile(
f"Length of file {fname!r} is over {limit} characters. "
"Skipping to avoid parser lock. Users can increase this limit "
"in their config by setting the 'large_file_skip_char_limit' "
Expand Down Expand Up @@ -162,8 +168,8 @@ def __init__(
for idx, tfs in enumerate(self.sliced_file):
if previous_slice:
if tfs.templated_slice.start != previous_slice.templated_slice.stop:
raise SQLTemplaterSkipFile( # pragma: no cover
"Templated slices found to be non contigious. "
raise SQLFluffSkipFile( # pragma: no cover
"Templated slices found to be non-contiguous. "
f"{tfs.templated_slice} (starting"
f" {self.templated_str[tfs.templated_slice]!r})"
f" does not follow {previous_slice.templated_slice} "
Expand All @@ -173,14 +179,14 @@ def __init__(
)
else:
if tfs.templated_slice.start != 0:
raise SQLTemplaterSkipFile( # pragma: no cover
raise SQLFluffSkipFile( # pragma: no cover
"First Templated slice not started at index 0 "
f"(found slice {tfs.templated_slice})"
)
previous_slice = tfs
if self.sliced_file and templated_str is not None:
if tfs.templated_slice.stop != len(templated_str):
raise SQLTemplaterSkipFile( # pragma: no cover
raise SQLFluffSkipFile( # pragma: no cover
"Length of templated file mismatch with final slice: "
f"{len(templated_str)} != {tfs.templated_slice.stop}."
)
Expand Down
46 changes: 45 additions & 1 deletion test/core/linter_test.py
Expand Up @@ -10,7 +10,13 @@
from sqlfluff.core import Linter, FluffConfig
from sqlfluff.core.dialects import load_raw_dialect
from sqlfluff.core.linter import runner
from sqlfluff.core.errors import SQLLexError, SQLBaseError, SQLLintError, SQLParseError
from sqlfluff.core.errors import (
SQLFluffSkipFile,
SQLLexError,
SQLBaseError,
SQLLintError,
SQLParseError,
)
from sqlfluff.cli.formatters import OutputStreamFormatter
from sqlfluff.cli.outputstream import make_output_stream
from sqlfluff.core.linter import LintingResult, NoQaDirective
Expand Down Expand Up @@ -75,6 +81,44 @@ def test__linter__path_from_paths__file():
assert normalise_paths(paths) == {"test.fixtures.linter.indentation_errors.sql"}


@pytest.mark.parametrize("filesize,raises_skip", [(0, False), (5, True), (2000, False)])
def test__linter__skip_large_bytes(filesize, raises_skip):
"""Test extracting paths from a file path."""
config = FluffConfig(
overrides={"large_file_skip_byte_limit": filesize, "dialect": "ansi"}
)
# First check the function directly
if raises_skip:
with pytest.raises(SQLFluffSkipFile) as excinfo:
Linter._load_raw_file_and_config(
"test/fixtures/linter/indentation_errors.sql", config
)
assert "Skipping" in str(excinfo.value)
assert f"over the limit of {filesize}" in str(excinfo.value)
# If NOT raises, then we'll catch the raise an error and the test will fail.

# Then check that it either is or isn't linted appropriately via lint_paths.
lntr = Linter(config)
result = lntr.lint_paths(
("test/fixtures/linter/indentation_errors.sql",),
)
if raises_skip:
assert not result.get_violations()
else:
assert result.get_violations()

# Same again via parse_path, which is the other entry point.
result = list(
lntr.parse_path(
"test/fixtures/linter/indentation_errors.sql",
)
)
if raises_skip:
assert not result
else:
assert result


def test__linter__path_from_paths__not_exist():
"""Test extracting paths from a file path."""
lntr = Linter()
Expand Down
4 changes: 2 additions & 2 deletions test/core/templaters/jinja_test.py
Expand Up @@ -4,7 +4,7 @@
from typing import List, NamedTuple

import pytest
from sqlfluff.core.errors import SQLTemplaterSkipFile
from sqlfluff.core.errors import SQLFluffSkipFile

from sqlfluff.core.templaters import JinjaTemplater
from sqlfluff.core.templaters.base import RawFileSlice, TemplatedFile
Expand Down Expand Up @@ -1218,7 +1218,7 @@ def test__templater_jinja_large_file_check():
),
)
# Finally check we raise a skip exception when config is set low.
with pytest.raises(SQLTemplaterSkipFile) as excinfo:
with pytest.raises(SQLFluffSkipFile) as excinfo:
JinjaTemplater().process(
in_str="SELECT 1",
fname="<string>",
Expand Down
4 changes: 2 additions & 2 deletions test/core/templaters/python_test.py
Expand Up @@ -2,7 +2,7 @@

import pytest
import logging
from sqlfluff.core.errors import SQLTemplaterSkipFile
from sqlfluff.core.errors import SQLFluffSkipFile

from sqlfluff.core.templaters import PythonTemplater
from sqlfluff.core import SQLTemplaterError, FluffConfig
Expand Down Expand Up @@ -485,7 +485,7 @@ def test__templater_python_large_file_check():
# First check we can process the file normally without config.
PythonTemplater().process(in_str="SELECT 1", fname="<string>")
# Then check we raise a skip exception when config is set low.
with pytest.raises(SQLTemplaterSkipFile) as excinfo:
with pytest.raises(SQLFluffSkipFile) as excinfo:
PythonTemplater().process(
in_str="SELECT 1",
fname="<string>",
Expand Down

0 comments on commit 568e698

Please sign in to comment.