diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e4dc9ea..2d0457e9 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -100,6 +100,12 @@ entry: debug-statement-hook language: python types: [python] +- id: destroyed-symlinks + name: Detect Destroyed Symlinks + description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. + entry: destroyed-symlinks + language: python + types: [file] - id: detect-aws-credentials name: Detect AWS Credentials description: Detects *your* aws credentials from the aws cli credentials file diff --git a/README.md b/README.md index 3552721f..f3c15323 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,15 @@ Attempts to load all yaml files to verify syntax. #### `debug-statements` Check for debugger imports and py37+ `breakpoint()` calls in python source. +#### `destroyed-symlinks` +Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. +This usually happens on Windows in case when user without a permission for creating symlinks clones repository with symlinks. +The following argument is available: +- `--autofix` - unstage detected broken symlinks so they won't be commited. + Note: this option won't fix the symlinks on the filesystem because, + if the symlink has been destroyed in the first place, there is some reason for that + (see above) which this hook most likely won't be able to fix. + #### `detect-aws-credentials` Checks for the existence of AWS secrets that you have set up with the AWS CLI. The following arguments are available: diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py index a02d2a9c..2d2bd7df 100644 --- a/pre_commit_hooks/check_executables_have_shebangs.py +++ b/pre_commit_hooks/check_executables_have_shebangs.py @@ -8,18 +8,11 @@ from typing import Set from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7')) -def zsplit(s: str) -> List[str]: - s = s.strip('\0') - if s: - return s.split('\0') - else: - return [] - - def check_executables(paths: List[str]) -> int: if sys.platform == 'win32': # pragma: win32 cover return _check_git_filemode(paths) diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py new file mode 100755 index 00000000..34c36aac --- /dev/null +++ b/pre_commit_hooks/destroyed_symlinks.py @@ -0,0 +1,109 @@ +import argparse +import itertools +import shlex +import subprocess +from typing import List +from typing import Optional +from typing import Sequence + +from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit + +ORDINARY_CHANGED_ENTRIES_MARKER = '1' +PERMS_LINK = '120000' +PERMS_NONEXIST = '000000' + + +def normalize_content(content: bytes) -> bytes: + return content.rstrip() + + +def find_destroyed_symlinks(files: Sequence[str]) -> Sequence[str]: + destroyed_links: List[str] = [] + if not files: + return destroyed_links + for line in zsplit( + cmd_output( + *itertools.chain( + ('git', 'status', '--porcelain=v2', '-z', '--'), + files, + ), + ), + ): + splitted = line.split(' ') + if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER: + # https://git-scm.com/docs/git-status#_changed_tracked_entries + ( + _, _, _, + mode_HEAD, + mode_index, + _, + hash_HEAD, + hash_index, + *path_splitted, + ) = splitted + path = ' '.join(path_splitted) + if all(( + mode_HEAD == PERMS_LINK, + mode_index != PERMS_LINK, + mode_index != PERMS_NONEXIST, + )): + if hash_HEAD == hash_index: + # if old and new hashes are equal, it's not needed to check + # anything more, we've found a destroyed symlink for sure + destroyed_links.append(path) + else: + # if old and new hashes are *not* equal, it doesn't mean + # that everything is OK - new file may be altered + # by something like trailing-whitespace and/or + # mixed-line-ending hooks so we need to go deeper + SIZE_CMD = ('git', 'cat-file', '-s') + size_index = int(cmd_output(*SIZE_CMD, hash_index).strip()) + size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip()) + + # in the worst case new file may have CRLF added + # so check content only if new file is bigger + # not more than 2 bytes compared to the old one + if size_index <= size_HEAD + 2: + head_content = normalize_content( + subprocess.check_output( + ('git', 'cat-file', '-p', hash_HEAD), + ), + ) + index_content = normalize_content( + subprocess.check_output( + ('git', 'cat-file', '-p', hash_index), + ), + ) + if head_content == index_content: + destroyed_links.append(path) + return destroyed_links + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check.') + args = parser.parse_args(argv) + destroyed_links = find_destroyed_symlinks(files=args.filenames) + if destroyed_links: + print('Destroyed symlinks:') + for destroyed_link in destroyed_links: + print(f'- {destroyed_link}') + print('You should unstage affected files:') + print( + ' git reset HEAD -- {}'.format( + ' '.join(map(shlex.quote, destroyed_links)), + ), + ) + print( + 'And retry commit. As a long term solution ' + 'you may try to explicitly tell git that your ' + 'envionment does not support symlinks:', + ) + print(' git config --local core.symlinks false') + return 1 + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit_hooks/util.py b/pre_commit_hooks/util.py index e04b0150..4b9b5f5f 100644 --- a/pre_commit_hooks/util.py +++ b/pre_commit_hooks/util.py @@ -1,7 +1,10 @@ import subprocess from typing import Any +from typing import AnyStr +from typing import List from typing import Optional from typing import Set +from typing import Union class CalledProcessError(RuntimeError): @@ -13,7 +16,11 @@ def added_files() -> Set[str]: return set(cmd_output(*cmd).splitlines()) -def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str: +def cmd_output( + *cmd: Union[str, bytes], + retcode: Optional[int] = 0, + **kwargs: Any, +) -> str: kwargs.setdefault('stdout', subprocess.PIPE) kwargs.setdefault('stderr', subprocess.PIPE) proc = subprocess.Popen(cmd, **kwargs) @@ -22,3 +29,21 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str: if retcode is not None and proc.returncode != retcode: raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr) return stdout + + +def zsplit(s: AnyStr) -> List[AnyStr]: + if isinstance(s, bytes): + separator = b'\0' + elif isinstance(s, str): + separator = '\0' + else: + raise TypeError( + 'Expected type of argument is str or bytes, got {}'.format( + type(s), + ), + ) + s = s.strip(separator) + if s: + return s.split(separator) + else: + return [] diff --git a/setup.cfg b/setup.cfg index 47b8bb6d..d6047c99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ console_scripts = check-xml = pre_commit_hooks.check_xml:main check-yaml = pre_commit_hooks.check_yaml:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main + destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main detect-private-key = pre_commit_hooks.detect_private_key:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py index 7046081f..5703eded 100644 --- a/tests/check_executables_have_shebangs_test.py +++ b/tests/check_executables_have_shebangs_test.py @@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir): assert check_executables_have_shebangs._check_git_filemode(files) == 1 -@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0')) -def test_check_zsplits_correctly(out): - assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2'] - - -@pytest.mark.parametrize('out', ('\0\0', '\0', '')) -def test_check_zsplit_returns_empty(out): - assert check_executables_have_shebangs.zsplit(out) == [] - - @pytest.mark.parametrize( ('content', 'mode', 'expected'), ( diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py new file mode 100644 index 00000000..cd716cc9 --- /dev/null +++ b/tests/destroyed_symlinks_test.py @@ -0,0 +1,89 @@ +import os +import subprocess + +import pytest + +from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks +from pre_commit_hooks.destroyed_symlinks import main +from pre_commit_hooks.destroyed_symlinks import normalize_content + +TEST_SYMLINK = 'test_symlink' +TEST_SYMLINK_TARGET = '/doesnt/really/matters' +TEST_FILE = 'test_file' +TEST_FILE_RENAMED = f'{TEST_FILE}_renamed' + + +@pytest.fixture +def repo_with_destroyed_symlink(tmpdir): + source_repo = tmpdir.join('src') + os.makedirs(source_repo, exist_ok=True) + test_repo = tmpdir.join('test') + with source_repo.as_cwd(): + subprocess.check_call(('git', 'init')) + os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK) + with open(TEST_FILE, 'w') as f: + print('some random content', file=f) + subprocess.check_call(('git', 'add', '.')) + subprocess.check_call( + ('git', 'commit', '--no-gpg-sign', '-m', 'initial'), + ) + assert b'120000 ' in subprocess.check_output( + ('git', 'cat-file', '-p', 'HEAD^{tree}'), + ) + subprocess.check_call( + ('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo), + ) + with test_repo.as_cwd(): + subprocess.check_call( + ('git', 'config', '--local', 'core.symlinks', 'true'), + ) + subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED)) + assert not os.path.islink(test_repo.join(TEST_SYMLINK)) + yield test_repo + + +@pytest.mark.parametrize( + ('content', 'result'), + ( + (b'qwer', b'qwer'), + (b'qwer\n', b'qwer'), + (b'qwer \n', b'qwer'), + (b'qwer ', b'qwer'), + (b'qwer \t\r\n', b'qwer'), + ), +) +def test_normalize_content(content: bytes, result: bytes) -> None: + assert normalize_content(content) == result + + +def test_find_destroyed_symlinks(repo_with_destroyed_symlink): + with repo_with_destroyed_symlink.as_cwd(): + assert find_destroyed_symlinks([]) == [] + assert main([]) == 0 + + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK] + assert find_destroyed_symlinks([]) == [] + assert main([]) == 0 + assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == [] + ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED] + assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK] + assert main(ALL_STAGED) != 0 + + with open(TEST_SYMLINK, 'a') as f: + print(file=f) # add trailing newline + subprocess.check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK] + assert main(ALL_STAGED) != 0 + + with open(TEST_SYMLINK, 'w') as f: + print('0' * len(TEST_SYMLINK_TARGET), file=f) + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks(ALL_STAGED) == [] + assert main(ALL_STAGED) == 0 + + with open(TEST_SYMLINK, 'w') as f: + print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f) + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks(ALL_STAGED) == [] + assert main(ALL_STAGED) == 0 diff --git a/tests/util_test.py b/tests/util_test.py index b42ee6f9..9b4ecea1 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -2,6 +2,7 @@ from pre_commit_hooks.util import CalledProcessError from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit def test_raises_on_error(): @@ -12,3 +13,23 @@ def test_raises_on_error(): def test_output(): ret = cmd_output('sh', '-c', 'echo hi') assert ret == 'hi\n' + + +@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0')) +def test_check_zsplits_str_correctly(out): + assert zsplit(out) == ['f1', 'f2'] + + +@pytest.mark.parametrize('out', (b'\0f1\0f2\0', b'\0f1\0f2', b'f1\0f2\0')) +def test_check_zsplits_bytes_correctly(out): + assert zsplit(out) == [b'f1', b'f2'] + + +@pytest.mark.parametrize('out', ('\0\0', b'\0\0', '\0', b'\0', '', b'')) +def test_check_zsplit_returns_empty(out): + assert zsplit(out) == [] + + +def test_check_zsplit_raises_on_wrong_type(): + with pytest.raises(TypeError): + zsplit([1, 2, 3]) # type: ignore