diff --git a/README.md b/README.md index 7aa988a0da8..bb98ebc9607 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,16 @@ Options: later. [default: /(\.eggs|\.git|\.hg|\.mypy _cache|\.nox|\.tox|\.venv|_build|buck- out|build|dist)/] + --check-shebang Whether or not to perform shebang check in + order to discover python files. The regex + can be overridden with --shebang option. + This might be slow with large number of + files in the directories to check. + Exclusions still apply. + --shebang TEXT A regular expression that will be matched to + the first line of each file in provided + directories. [default: ^#!/usr/bin/env + python] -q, --quiet Don't emit non-error messages to stderr. Errors are still emitted, silence those with 2>/dev/null. diff --git a/black.py b/black.py index 957e51a939f..16d1d5921c3 100644 --- a/black.py +++ b/black.py @@ -58,9 +58,9 @@ r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/" ) DEFAULT_INCLUDES = r"\.pyi?$" +DEFAULT_SHEBANGS = r"^#!/usr/bin/env python" CACHE_DIR = Path(user_cache_dir("black", version=__version__)) - # types FileContent = str Encoding = str @@ -337,6 +337,25 @@ def read_pyproject_toml( ), show_default=True, ) +@click.option( + "--check-shebang", + is_flag=True, + help=( + "Whether or not to perform shebang check in order to discover python files. " + "The regex can be overridden with --shebang option. This might be slow with " + "large number of files in the directories to check. Exclusions still apply." + ), +) +@click.option( + "--shebang", + type=str, + default=DEFAULT_SHEBANGS, + help=( + "A regular expression that will be matched to the first line of each file in " + "provided directories." + ), + show_default=True, +) @click.option( "-q", "--quiet", @@ -389,6 +408,8 @@ def main( verbose: bool, include: str, exclude: str, + check_shebang: bool, + shebang: str, src: Tuple[str], config: Optional[str], ) -> None: @@ -420,16 +441,18 @@ def main( if code is not None: print(format_str(code, mode=mode)) ctx.exit(0) - try: - include_regex = re_compile_maybe_verbose(include) - except re.error: - err(f"Invalid regular expression for include given: {include!r}") - ctx.exit(2) - try: - exclude_regex = re_compile_maybe_verbose(exclude) - except re.error: - err(f"Invalid regular expression for exclude given: {exclude!r}") - ctx.exit(2) + + def get_regex(raw_string: str, name: str) -> Pattern[str]: + try: + return re_compile_maybe_verbose(raw_string) + except re.error: + err(f"Invalid regular expression for {name} given: {raw_string!r}") + ctx.exit(2) + + include_regex = get_regex(include, "include") + exclude_regex = get_regex(exclude, "exclude") + shebang_regex = get_regex(shebang, "shebang") + report = Report(check=check, quiet=quiet, verbose=verbose) root = find_project_root(src) sources: Set[Path] = set() @@ -438,7 +461,15 @@ def main( p = Path(s) if p.is_dir(): sources.update( - gen_python_files_in_dir(p, root, include_regex, exclude_regex, report) + gen_python_files_in_dir( + p, + root, + include_regex, + exclude_regex, + check_shebang, + shebang_regex, + report, + ) ) elif p.is_file() or s == "-": # if a file was explicitly given, we don't care about its extension @@ -3406,15 +3437,31 @@ def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]: return imports +def matches_shebang(path: Path, shebang: Pattern[str]) -> bool: + """Return True if shebang of file in `path` matches `shebang` regex.""" + max_shebang_length = 127 # default value in Linux kernel + try: + with open(path) as f: + first_line = f.readline(max_shebang_length) + except (UnicodeDecodeError, OSError): + return False + + return bool(shebang.match(first_line)) + + def gen_python_files_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], + check_shebang: bool, + shebang: Pattern[str], report: "Report", ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the - `exclude` regex, but are included by the `include` regex. + `exclude` regex, but are included by the `include` regex. When + `check_shebang` is True, also include files that match the requested + `shebang` regex. Symbolic links pointing outside of the `root` directory are ignored. @@ -3441,11 +3488,13 @@ def gen_python_files_in_dir( continue if child.is_dir(): - yield from gen_python_files_in_dir(child, root, include, exclude, report) + yield from gen_python_files_in_dir( + child, root, include, exclude, check_shebang, shebang, report + ) elif child.is_file(): include_match = include.search(normalized_path) - if include_match: + if include_match or check_shebang and matches_shebang(child, shebang): yield child diff --git a/tests/data/shebang_tests/custom_shebang b/tests/data/shebang_tests/custom_shebang new file mode 100644 index 00000000000..d66b1f7b322 --- /dev/null +++ b/tests/data/shebang_tests/custom_shebang @@ -0,0 +1,2 @@ +#!/path/to/my/python -O +'' diff --git a/tests/data/shebang_tests/decoding_error b/tests/data/shebang_tests/decoding_error new file mode 100644 index 00000000000..e646bf7abb4 --- /dev/null +++ b/tests/data/shebang_tests/decoding_error @@ -0,0 +1,2 @@ + +'' diff --git a/tests/data/shebang_tests/default_shebang b/tests/data/shebang_tests/default_shebang new file mode 100644 index 00000000000..866bccded1a --- /dev/null +++ b/tests/data/shebang_tests/default_shebang @@ -0,0 +1,2 @@ +#!/usr/bin/env python +'' diff --git a/tests/data/shebang_tests/too_long_shebang b/tests/data/shebang_tests/too_long_shebang new file mode 100644 index 00000000000..ebd43d314b7 --- /dev/null +++ b/tests/data/shebang_tests/too_long_shebang @@ -0,0 +1,2 @@ +#!/my/ultra/long/path/to/custom/interpreter/that/exceeds/the/default/shebang/length/defined/in/linux/kernel/at/compile/time/python +'' diff --git a/tests/test_black.py b/tests/test_black.py index 66a0761ecf8..27503f7ce6a 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1355,6 +1355,8 @@ def test_include_exclude(self) -> None: path = THIS_DIR / "data" / "include_exclude_tests" include = re.compile(r"\.pyi?$") exclude = re.compile(r"/exclude/|/\.definitely_exclude/") + check_shebang = False + empty = re.compile(r"") report = black.Report() sources: List[Path] = [] expected = [ @@ -1363,12 +1365,15 @@ def test_include_exclude(self) -> None: ] this_abs = THIS_DIR.resolve() sources.extend( - black.gen_python_files_in_dir(path, this_abs, include, exclude, report) + black.gen_python_files_in_dir( + path, this_abs, include, exclude, check_shebang, empty, report + ) ) self.assertEqual(sorted(expected), sorted(sources)) def test_empty_include(self) -> None: path = THIS_DIR / "data" / "include_exclude_tests" + check_shebang = False report = black.Report() empty = re.compile(r"") sources: List[Path] = [] @@ -1386,13 +1391,20 @@ def test_empty_include(self) -> None: this_abs = THIS_DIR.resolve() sources.extend( black.gen_python_files_in_dir( - path, this_abs, empty, re.compile(black.DEFAULT_EXCLUDES), report + path, + this_abs, + empty, + re.compile(black.DEFAULT_EXCLUDES), + check_shebang, + empty, + report, ) ) self.assertEqual(sorted(expected), sorted(sources)) def test_empty_exclude(self) -> None: path = THIS_DIR / "data" / "include_exclude_tests" + check_shebang = False report = black.Report() empty = re.compile(r"") sources: List[Path] = [] @@ -1407,7 +1419,13 @@ def test_empty_exclude(self) -> None: this_abs = THIS_DIR.resolve() sources.extend( black.gen_python_files_in_dir( - path, this_abs, re.compile(black.DEFAULT_INCLUDES), empty, report + path, + this_abs, + re.compile(black.DEFAULT_INCLUDES), + empty, + check_shebang, + empty, + report, ) ) self.assertEqual(sorted(expected), sorted(sources)) @@ -1416,6 +1434,48 @@ def test_invalid_include_exclude(self) -> None: for option in ["--include", "--exclude"]: self.invokeBlack(["-", option, "**()(!!*)"], exit_code=2) + @event_loop(close=False) + def test_check_shebang(self) -> None: + test_path = THIS_DIR / "data" / "shebang_tests" + empty_config = ["--config", str(THIS_DIR / "empty.toml")] + runner = BlackRunner() + + runner.invoke( + black.main, ["--check", "--check-shebang", str(test_path)] + empty_config + ) + output = runner.stderr_bytes.decode() + + self.assertIn(str(test_path / "default_shebang"), output) + self.assertNotIn(str(test_path / "decoding_error"), output) + self.assertIn("\n1 file would be reformatted", output) + + @event_loop(close=False) + def test_custom_shebang_regex(self) -> None: + test_path = THIS_DIR / "data" / "shebang_tests" + empty_config = ["--config", str(THIS_DIR / "empty.toml")] + runner = BlackRunner() + + runner.invoke( + black.main, + ["--check", "--check-shebang", "--shebang", "^#!.*python", str(test_path)] + + empty_config, + ) + output = runner.stderr_bytes.decode() + + self.assertIn(str(test_path / "default_shebang"), output) + self.assertIn(str(test_path / "custom_shebang"), output) + self.assertNotIn(str(test_path / "too_long_shebang"), output) + self.assertNotIn(str(test_path / "decoding_error"), output) + self.assertIn("\n2 files would be reformatted", output) + + def test_matches_shebang_handles_os_errors(self) -> None: + raising_mock = MagicMock(side_effect=PermissionError) + + with patch("builtins.open", raising_mock): + self.assertFalse( + black.matches_shebang(THIS_DIR / "some_file", re.compile("")) + ) + def test_preserves_line_endings(self) -> None: with TemporaryDirectory() as workspace: test_file = Path(workspace) / "test.py" @@ -1451,6 +1511,8 @@ def test_symlink_out_of_root_directory(self) -> None: child = MagicMock() include = re.compile(black.DEFAULT_INCLUDES) exclude = re.compile(black.DEFAULT_EXCLUDES) + check_shebang = False + shebang = re.compile(black.DEFAULT_SHEBANGS) report = black.Report() # `child` should behave like a symlink which resolved path is clearly # outside of the `root` directory. @@ -1458,7 +1520,11 @@ def test_symlink_out_of_root_directory(self) -> None: child.resolve.return_value = Path("/a/b/c") child.is_symlink.return_value = True try: - list(black.gen_python_files_in_dir(path, root, include, exclude, report)) + list( + black.gen_python_files_in_dir( + path, root, include, exclude, check_shebang, shebang, report + ) + ) except ValueError as ve: self.fail(f"`get_python_files_in_dir()` failed: {ve}") path.iterdir.assert_called_once() @@ -1468,7 +1534,11 @@ def test_symlink_out_of_root_directory(self) -> None: # outside of the `root` directory. child.is_symlink.return_value = False with self.assertRaises(ValueError): - list(black.gen_python_files_in_dir(path, root, include, exclude, report)) + list( + black.gen_python_files_in_dir( + path, root, include, exclude, check_shebang, shebang, report + ) + ) path.iterdir.assert_called() self.assertEqual(path.iterdir.call_count, 2) child.resolve.assert_called()