Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read shebangs to discover python files #930

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ Options:
later. [default: /(\.eggs|\.git|\.hg|\.mypy
_cache|\.nox|\.tox|\.venv|_build|buck-
out|build|dist)/]
--check-shebang Whether or not to perform shebang check in
order to discover python files. The regex
can be overridden with --shebang option.
This might be slow with large number of
files in the directories to check.
Exclusions still apply.
--shebang TEXT A regular expression that will be matched to
the first line of each file in provided
directories. [default: ^#!/usr/bin/env
python]
-q, --quiet Don't emit non-error messages to stderr.
Errors are still emitted, silence those with
2>/dev/null.
Expand Down
79 changes: 64 additions & 15 deletions black.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@
r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
)
DEFAULT_INCLUDES = r"\.pyi?$"
DEFAULT_SHEBANGS = r"^#!/usr/bin/env python"
CACHE_DIR = Path(user_cache_dir("black", version=__version__))


# types
FileContent = str
Encoding = str
Expand Down Expand Up @@ -337,6 +337,25 @@ def read_pyproject_toml(
),
show_default=True,
)
@click.option(
"--check-shebang",
is_flag=True,
help=(
"Whether or not to perform shebang check in order to discover python files. "
"The regex can be overridden with --shebang option. This might be slow with "
"large number of files in the directories to check. Exclusions still apply."
),
)
@click.option(
"--shebang",
type=str,
default=DEFAULT_SHEBANGS,
help=(
"A regular expression that will be matched to the first line of each file in "
"provided directories."
),
show_default=True,
)
@click.option(
"-q",
"--quiet",
Expand Down Expand Up @@ -389,6 +408,8 @@ def main(
verbose: bool,
include: str,
exclude: str,
check_shebang: bool,
shebang: str,
src: Tuple[str],
config: Optional[str],
) -> None:
Expand Down Expand Up @@ -420,16 +441,18 @@ def main(
if code is not None:
print(format_str(code, mode=mode))
ctx.exit(0)
try:
include_regex = re_compile_maybe_verbose(include)
except re.error:
err(f"Invalid regular expression for include given: {include!r}")
ctx.exit(2)
try:
exclude_regex = re_compile_maybe_verbose(exclude)
except re.error:
err(f"Invalid regular expression for exclude given: {exclude!r}")
ctx.exit(2)

def get_regex(raw_string: str, name: str) -> Pattern[str]:
try:
return re_compile_maybe_verbose(raw_string)
except re.error:
err(f"Invalid regular expression for {name} given: {raw_string!r}")
ctx.exit(2)

include_regex = get_regex(include, "include")
exclude_regex = get_regex(exclude, "exclude")
shebang_regex = get_regex(shebang, "shebang")

report = Report(check=check, quiet=quiet, verbose=verbose)
root = find_project_root(src)
sources: Set[Path] = set()
Expand All @@ -438,7 +461,15 @@ def main(
p = Path(s)
if p.is_dir():
sources.update(
gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
gen_python_files_in_dir(
p,
root,
include_regex,
exclude_regex,
check_shebang,
shebang_regex,
report,
)
)
elif p.is_file() or s == "-":
# if a file was explicitly given, we don't care about its extension
Expand Down Expand Up @@ -3406,15 +3437,31 @@ def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
return imports


def matches_shebang(path: Path, shebang: Pattern[str]) -> bool:
"""Return True if shebang of file in `path` matches `shebang` regex."""
max_shebang_length = 127 # default value in Linux kernel
try:
with open(path) as f:
first_line = f.readline(max_shebang_length)
except (UnicodeDecodeError, OSError):
return False

return bool(shebang.match(first_line))


def gen_python_files_in_dir(
path: Path,
root: Path,
include: Pattern[str],
exclude: Pattern[str],
check_shebang: bool,
shebang: Pattern[str],
report: "Report",
) -> Iterator[Path]:
"""Generate all files under `path` whose paths are not excluded by the
`exclude` regex, but are included by the `include` regex.
`exclude` regex, but are included by the `include` regex. When
`check_shebang` is True, also include files that match the requested
`shebang` regex.

Symbolic links pointing outside of the `root` directory are ignored.

Expand All @@ -3441,11 +3488,13 @@ def gen_python_files_in_dir(
continue

if child.is_dir():
yield from gen_python_files_in_dir(child, root, include, exclude, report)
yield from gen_python_files_in_dir(
child, root, include, exclude, check_shebang, shebang, report
)

elif child.is_file():
include_match = include.search(normalized_path)
if include_match:
if include_match or check_shebang and matches_shebang(child, shebang):
yield child


Expand Down
2 changes: 2 additions & 0 deletions tests/data/shebang_tests/custom_shebang
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/path/to/my/python -O
''
2 changes: 2 additions & 0 deletions tests/data/shebang_tests/decoding_error
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
�����
''
2 changes: 2 additions & 0 deletions tests/data/shebang_tests/default_shebang
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python
''
2 changes: 2 additions & 0 deletions tests/data/shebang_tests/too_long_shebang
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/my/ultra/long/path/to/custom/interpreter/that/exceeds/the/default/shebang/length/defined/in/linux/kernel/at/compile/time/python
''
80 changes: 75 additions & 5 deletions tests/test_black.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,8 @@ def test_include_exclude(self) -> None:
path = THIS_DIR / "data" / "include_exclude_tests"
include = re.compile(r"\.pyi?$")
exclude = re.compile(r"/exclude/|/\.definitely_exclude/")
check_shebang = False
empty = re.compile(r"")
report = black.Report()
sources: List[Path] = []
expected = [
Expand All @@ -1363,12 +1365,15 @@ def test_include_exclude(self) -> None:
]
this_abs = THIS_DIR.resolve()
sources.extend(
black.gen_python_files_in_dir(path, this_abs, include, exclude, report)
black.gen_python_files_in_dir(
path, this_abs, include, exclude, check_shebang, empty, report
)
)
self.assertEqual(sorted(expected), sorted(sources))

def test_empty_include(self) -> None:
path = THIS_DIR / "data" / "include_exclude_tests"
check_shebang = False
report = black.Report()
empty = re.compile(r"")
sources: List[Path] = []
Expand All @@ -1386,13 +1391,20 @@ def test_empty_include(self) -> None:
this_abs = THIS_DIR.resolve()
sources.extend(
black.gen_python_files_in_dir(
path, this_abs, empty, re.compile(black.DEFAULT_EXCLUDES), report
path,
this_abs,
empty,
re.compile(black.DEFAULT_EXCLUDES),
check_shebang,
empty,
report,
)
)
self.assertEqual(sorted(expected), sorted(sources))

def test_empty_exclude(self) -> None:
path = THIS_DIR / "data" / "include_exclude_tests"
check_shebang = False
report = black.Report()
empty = re.compile(r"")
sources: List[Path] = []
Expand All @@ -1407,7 +1419,13 @@ def test_empty_exclude(self) -> None:
this_abs = THIS_DIR.resolve()
sources.extend(
black.gen_python_files_in_dir(
path, this_abs, re.compile(black.DEFAULT_INCLUDES), empty, report
path,
this_abs,
re.compile(black.DEFAULT_INCLUDES),
empty,
check_shebang,
empty,
report,
)
)
self.assertEqual(sorted(expected), sorted(sources))
Expand All @@ -1416,6 +1434,48 @@ def test_invalid_include_exclude(self) -> None:
for option in ["--include", "--exclude"]:
self.invokeBlack(["-", option, "**()(!!*)"], exit_code=2)

@event_loop(close=False)
def test_check_shebang(self) -> None:
test_path = THIS_DIR / "data" / "shebang_tests"
empty_config = ["--config", str(THIS_DIR / "empty.toml")]
runner = BlackRunner()

runner.invoke(
black.main, ["--check", "--check-shebang", str(test_path)] + empty_config
)
output = runner.stderr_bytes.decode()

self.assertIn(str(test_path / "default_shebang"), output)
self.assertNotIn(str(test_path / "decoding_error"), output)
self.assertIn("\n1 file would be reformatted", output)

@event_loop(close=False)
def test_custom_shebang_regex(self) -> None:
test_path = THIS_DIR / "data" / "shebang_tests"
empty_config = ["--config", str(THIS_DIR / "empty.toml")]
runner = BlackRunner()

runner.invoke(
black.main,
["--check", "--check-shebang", "--shebang", "^#!.*python", str(test_path)]
+ empty_config,
)
output = runner.stderr_bytes.decode()

self.assertIn(str(test_path / "default_shebang"), output)
self.assertIn(str(test_path / "custom_shebang"), output)
self.assertNotIn(str(test_path / "too_long_shebang"), output)
self.assertNotIn(str(test_path / "decoding_error"), output)
self.assertIn("\n2 files would be reformatted", output)

def test_matches_shebang_handles_os_errors(self) -> None:
raising_mock = MagicMock(side_effect=PermissionError)

with patch("builtins.open", raising_mock):
self.assertFalse(
black.matches_shebang(THIS_DIR / "some_file", re.compile(""))
)

def test_preserves_line_endings(self) -> None:
with TemporaryDirectory() as workspace:
test_file = Path(workspace) / "test.py"
Expand Down Expand Up @@ -1451,14 +1511,20 @@ def test_symlink_out_of_root_directory(self) -> None:
child = MagicMock()
include = re.compile(black.DEFAULT_INCLUDES)
exclude = re.compile(black.DEFAULT_EXCLUDES)
check_shebang = False
shebang = re.compile(black.DEFAULT_SHEBANGS)
report = black.Report()
# `child` should behave like a symlink which resolved path is clearly
# outside of the `root` directory.
path.iterdir.return_value = [child]
child.resolve.return_value = Path("/a/b/c")
child.is_symlink.return_value = True
try:
list(black.gen_python_files_in_dir(path, root, include, exclude, report))
list(
black.gen_python_files_in_dir(
path, root, include, exclude, check_shebang, shebang, report
)
)
except ValueError as ve:
self.fail(f"`get_python_files_in_dir()` failed: {ve}")
path.iterdir.assert_called_once()
Expand All @@ -1468,7 +1534,11 @@ def test_symlink_out_of_root_directory(self) -> None:
# outside of the `root` directory.
child.is_symlink.return_value = False
with self.assertRaises(ValueError):
list(black.gen_python_files_in_dir(path, root, include, exclude, report))
list(
black.gen_python_files_in_dir(
path, root, include, exclude, check_shebang, shebang, report
)
)
path.iterdir.assert_called()
self.assertEqual(path.iterdir.call_count, 2)
child.resolve.assert_called()
Expand Down