From 1e87d59a2d02c17ef7c305e4c8e883b841eb1d8b Mon Sep 17 00:00:00 2001 From: Mikhail Khvoinitsky Date: Sun, 2 Aug 2020 21:25:07 +0300 Subject: [PATCH] New hook 'destroyed-symlinks' to detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to; move zsplit to util --- .pre-commit-hooks.yaml | 6 ++ README.md | 6 ++ .../check_executables_have_shebangs.py | 9 +- pre_commit_hooks/destroyed_symlinks.py | 96 +++++++++++++++++++ pre_commit_hooks/util.py | 9 ++ setup.cfg | 1 + tests/check_executables_have_shebangs_test.py | 10 -- tests/destroyed_symlinks_test.py | 74 ++++++++++++++ tests/util_test.py | 11 +++ 9 files changed, 204 insertions(+), 18 deletions(-) create mode 100755 pre_commit_hooks/destroyed_symlinks.py create mode 100644 tests/destroyed_symlinks_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index a47f7339..fa617b97 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -100,6 +100,12 @@ entry: debug-statement-hook language: python types: [python] +- id: destroyed-symlinks + name: Detect Destroyed Symlinks + description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. + entry: destroyed-symlinks + language: python + types: [file] - id: detect-aws-credentials name: Detect AWS Credentials description: Detects *your* aws credentials from the aws cli credentials file diff --git a/README.md b/README.md index 2a762680..3a87e9cf 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,12 @@ Attempts to load all yaml files to verify syntax. #### `debug-statements` Check for debugger imports and py37+ `breakpoint()` calls in python source. +#### `destroyed-symlinks` +Detects symlinks which are changed to regular files with a content of a path +which that symlink was pointing to. +This usually happens on Windows when a user clones a repository that has +symlinks but they do not have the permission to create symlinks. + #### `detect-aws-credentials` Checks for the existence of AWS secrets that you have set up with the AWS CLI. The following arguments are available: diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py index a02d2a9c..2d2bd7df 100644 --- a/pre_commit_hooks/check_executables_have_shebangs.py +++ b/pre_commit_hooks/check_executables_have_shebangs.py @@ -8,18 +8,11 @@ from typing import Set from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7')) -def zsplit(s: str) -> List[str]: - s = s.strip('\0') - if s: - return s.split('\0') - else: - return [] - - def check_executables(paths: List[str]) -> int: if sys.platform == 'win32': # pragma: win32 cover return _check_git_filemode(paths) diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py new file mode 100755 index 00000000..cfaf4e53 --- /dev/null +++ b/pre_commit_hooks/destroyed_symlinks.py @@ -0,0 +1,96 @@ +import argparse +import shlex +import subprocess +from typing import List +from typing import Optional +from typing import Sequence + +from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit + +ORDINARY_CHANGED_ENTRIES_MARKER = '1' +PERMS_LINK = '120000' +PERMS_NONEXIST = '000000' + + +def find_destroyed_symlinks(files: Sequence[str]) -> List[str]: + destroyed_links: List[str] = [] + if not files: + return destroyed_links + for line in zsplit( + cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files), + ): + splitted = line.split(' ') + if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER: + # https://git-scm.com/docs/git-status#_changed_tracked_entries + ( + _, _, _, + mode_HEAD, + mode_index, + _, + hash_HEAD, + hash_index, + *path_splitted, + ) = splitted + path = ' '.join(path_splitted) + if ( + mode_HEAD == PERMS_LINK and + mode_index != PERMS_LINK and + mode_index != PERMS_NONEXIST + ): + if hash_HEAD == hash_index: + # if old and new hashes are equal, it's not needed to check + # anything more, we've found a destroyed symlink for sure + destroyed_links.append(path) + else: + # if old and new hashes are *not* equal, it doesn't mean + # that everything is OK - new file may be altered + # by something like trailing-whitespace and/or + # mixed-line-ending hooks so we need to go deeper + SIZE_CMD = ('git', 'cat-file', '-s') + size_index = int(cmd_output(*SIZE_CMD, hash_index).strip()) + size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip()) + + # in the worst case new file may have CRLF added + # so check content only if new file is bigger + # not more than 2 bytes compared to the old one + if size_index <= size_HEAD + 2: + head_content = subprocess.check_output( + ('git', 'cat-file', '-p', hash_HEAD), + ).rstrip() + index_content = subprocess.check_output( + ('git', 'cat-file', '-p', hash_index), + ).rstrip() + if head_content == index_content: + destroyed_links.append(path) + return destroyed_links + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check.') + args = parser.parse_args(argv) + destroyed_links = find_destroyed_symlinks(files=args.filenames) + if destroyed_links: + print('Destroyed symlinks:') + for destroyed_link in destroyed_links: + print(f'- {destroyed_link}') + print('You should unstage affected files:') + print( + '\tgit reset HEAD -- {}'.format( + ' '.join(shlex.quote(link) for link in destroyed_links), + ), + ) + print( + 'And retry commit. As a long term solution ' + 'you may try to explicitly tell git that your ' + 'environment does not support symlinks:', + ) + print('\tgit config core.symlinks false') + return 1 + else: + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit_hooks/util.py b/pre_commit_hooks/util.py index e04b0150..402e33e6 100644 --- a/pre_commit_hooks/util.py +++ b/pre_commit_hooks/util.py @@ -1,5 +1,6 @@ import subprocess from typing import Any +from typing import List from typing import Optional from typing import Set @@ -22,3 +23,11 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str: if retcode is not None and proc.returncode != retcode: raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr) return stdout + + +def zsplit(s: str) -> List[str]: + s = s.strip('\0') + if s: + return s.split('\0') + else: + return [] diff --git a/setup.cfg b/setup.cfg index 3c401fc9..e2cad1ab 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,6 +44,7 @@ console_scripts = check-xml = pre_commit_hooks.check_xml:main check-yaml = pre_commit_hooks.check_yaml:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main + destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main detect-private-key = pre_commit_hooks.detect_private_key:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py index 7046081f..5703eded 100644 --- a/tests/check_executables_have_shebangs_test.py +++ b/tests/check_executables_have_shebangs_test.py @@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir): assert check_executables_have_shebangs._check_git_filemode(files) == 1 -@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0')) -def test_check_zsplits_correctly(out): - assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2'] - - -@pytest.mark.parametrize('out', ('\0\0', '\0', '')) -def test_check_zsplit_returns_empty(out): - assert check_executables_have_shebangs.zsplit(out) == [] - - @pytest.mark.parametrize( ('content', 'mode', 'expected'), ( diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py new file mode 100644 index 00000000..d2c90310 --- /dev/null +++ b/tests/destroyed_symlinks_test.py @@ -0,0 +1,74 @@ +import os +import subprocess + +import pytest + +from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks +from pre_commit_hooks.destroyed_symlinks import main + +TEST_SYMLINK = 'test_symlink' +TEST_SYMLINK_TARGET = '/doesnt/really/matters' +TEST_FILE = 'test_file' +TEST_FILE_RENAMED = f'{TEST_FILE}_renamed' + + +@pytest.fixture +def repo_with_destroyed_symlink(tmpdir): + source_repo = tmpdir.join('src') + os.makedirs(source_repo, exist_ok=True) + test_repo = tmpdir.join('test') + with source_repo.as_cwd(): + subprocess.check_call(('git', 'init')) + os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK) + with open(TEST_FILE, 'w') as f: + print('some random content', file=f) + subprocess.check_call(('git', 'add', '.')) + subprocess.check_call( + ('git', 'commit', '--no-gpg-sign', '-m', 'initial'), + ) + assert b'120000 ' in subprocess.check_output( + ('git', 'cat-file', '-p', 'HEAD^{tree}'), + ) + subprocess.check_call( + ('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo), + ) + with test_repo.as_cwd(): + subprocess.check_call( + ('git', 'config', '--local', 'core.symlinks', 'true'), + ) + subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED)) + assert not os.path.islink(test_repo.join(TEST_SYMLINK)) + yield test_repo + + +def test_find_destroyed_symlinks(repo_with_destroyed_symlink): + with repo_with_destroyed_symlink.as_cwd(): + assert find_destroyed_symlinks([]) == [] + assert main([]) == 0 + + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK] + assert find_destroyed_symlinks([]) == [] + assert main([]) == 0 + assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == [] + ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED] + assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK] + assert main(ALL_STAGED) != 0 + + with open(TEST_SYMLINK, 'a') as f: + print(file=f) # add trailing newline + subprocess.check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK] + assert main(ALL_STAGED) != 0 + + with open(TEST_SYMLINK, 'w') as f: + print('0' * len(TEST_SYMLINK_TARGET), file=f) + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks(ALL_STAGED) == [] + assert main(ALL_STAGED) == 0 + + with open(TEST_SYMLINK, 'w') as f: + print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f) + subprocess.check_call(('git', 'add', TEST_SYMLINK)) + assert find_destroyed_symlinks(ALL_STAGED) == [] + assert main(ALL_STAGED) == 0 diff --git a/tests/util_test.py b/tests/util_test.py index b42ee6f9..7f488161 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -2,6 +2,7 @@ from pre_commit_hooks.util import CalledProcessError from pre_commit_hooks.util import cmd_output +from pre_commit_hooks.util import zsplit def test_raises_on_error(): @@ -12,3 +13,13 @@ def test_raises_on_error(): def test_output(): ret = cmd_output('sh', '-c', 'echo hi') assert ret == 'hi\n' + + +@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0')) +def test_check_zsplits_str_correctly(out): + assert zsplit(out) == ['f1', 'f2'] + + +@pytest.mark.parametrize('out', ('\0\0', '\0', '')) +def test_check_zsplit_returns_empty(out): + assert zsplit(out) == []