diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e4dc9ea..4f118b6b 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -100,6 +100,13 @@ entry: debug-statement-hook language: python types: [python] +- id: destroyed-symlinks + name: Detect Destroyed Symlinks + description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. + entry: destroyed-symlinks + language: python + types: [file] + pass_filenames: false - id: detect-aws-credentials name: Detect AWS Credentials description: Detects *your* aws credentials from the aws cli credentials file diff --git a/README.md b/README.md index 3552721f..f3c15323 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,15 @@ Attempts to load all yaml files to verify syntax. #### `debug-statements` Check for debugger imports and py37+ `breakpoint()` calls in python source. +#### `destroyed-symlinks` +Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. +This usually happens on Windows in case when user without a permission for creating symlinks clones repository with symlinks. +The following argument is available: +- `--autofix` - unstage detected broken symlinks so they won't be commited. + Note: this option won't fix the symlinks on the filesystem because, + if the symlink has been destroyed in the first place, there is some reason for that + (see above) which this hook most likely won't be able to fix. + #### `detect-aws-credentials` Checks for the existence of AWS secrets that you have set up with the AWS CLI. The following arguments are available: diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py new file mode 100755 index 00000000..51f9fef5 --- /dev/null +++ b/pre_commit_hooks/destroyed_symlinks.py @@ -0,0 +1,90 @@ +import argparse +import sys +from operator import methodcaller +from subprocess import check_call +from subprocess import check_output +from typing import Optional +from typing import Sequence + +ORDINARY_CHANGED_ENTRIES_MARKER = b'1' +PERMS_LINK = b'120000' +PERMS_NONEXIST = b'000000' + + +def normalize_content(content: bytes) -> bytes: + return b'\n'.join( + filter( + None, + map( + methodcaller('strip'), + content.splitlines(), + ), + ), + ) + + +def find_destroyed_symlinks(autofix: bool) -> Sequence[bytes]: + destroyed_links = [] + for line in check_output(['git', 'status', '--porcelain=v2', '-z']).split(b'\0'): + splitted = line.split(b' ') + if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER: + # variable names are taken from https://git-scm.com/docs/git-status#_changed_tracked_entries + _, XY, sub, mH, mI, mW, hH, hI, *path_splitted = splitted + path = b' '.join(path_splitted) + if all(( + mH == PERMS_LINK, + mI != PERMS_LINK, + mI != PERMS_NONEXIST, + )): + found_destroyed_link = False + if hH == hI: + # if old and new hashes are equal, it's not needed to check anything more, we've found a destroyed symlink for sure + found_destroyed_link = True + else: + # if old and new hashes are *not* equal, it doesn't mean that everything is OK - + # new file may be altered by something like trailing-whitespace and/or mixed-line-ending hooks so we need to go deeper + index_size = int(check_output(['git', 'cat-file', '-s', hI]).strip()) + # Most filesystems limit path length to 4096 bytes. In the worst (insane) case when symlink points to a file which path + # consists of pure newlines and slashes, after converting it to Windows line break, its size in any case won't be bigger + # than 4096*2, so if new file is bigger than this, we can safely assume that it is not destroyed symlink but + # a valid new file instead of the symlink. + if index_size <= 8192: + head_content = normalize_content(check_output(['git', 'cat-file', '-p', hH])) + index_content = normalize_content(check_output(['git', 'cat-file', '-p', hI])) + found_destroyed_link = head_content == index_content + if found_destroyed_link: + destroyed_links.append(path) + if autofix: + check_call([ + 'git', + 'update-index', + '--cacheinfo', + b','.join(( + PERMS_LINK, + hH, + path, + )), + ]) + return destroyed_links + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('--autofix', action='store_true', help='unstage broken symlinks') + args = parser.parse_args(argv) + destroyed_links = find_destroyed_symlinks( + autofix=args.autofix, + ) + if destroyed_links: + print('Destroyed symlinks:', flush=True) + for destroyed_link in destroyed_links: + sys.stdout.buffer.write(b'- ') + sys.stdout.buffer.write(destroyed_link) + sys.stdout.buffer.write(b'\n') + sys.stdout.buffer.flush() + return 1 + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.cfg b/setup.cfg index 47b8bb6d..d6047c99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ console_scripts = check-xml = pre_commit_hooks.check_xml:main check-yaml = pre_commit_hooks.check_yaml:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main + destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main detect-private-key = pre_commit_hooks.detect_private_key:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py new file mode 100644 index 00000000..fd952d7a --- /dev/null +++ b/tests/destroyed_symlinks_test.py @@ -0,0 +1,64 @@ +import os +from subprocess import check_call +from subprocess import check_output + +import pytest + +from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks +from pre_commit_hooks.destroyed_symlinks import main +from pre_commit_hooks.destroyed_symlinks import normalize_content + +TEST_SYMLINK = 'test_symlink' + + +@pytest.fixture +def repo_with_destroyed_symlink(tmpdir): + source_repo = tmpdir.join('src') + os.makedirs(source_repo, exist_ok=True) + test_repo = tmpdir.join('test') + with source_repo.as_cwd(): + check_call(['git', 'init']) + os.symlink('/doesnt/really/matters', TEST_SYMLINK) + check_call(['git', 'add', '.']) + check_call(['git', 'commit', '--no-gpg-sign', '-m', 'initial']) + assert check_output(['git', 'cat-file', '-p', 'HEAD^{tree}']).startswith(b'120000') + check_call(['git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo]) + assert not os.path.islink(test_repo.join(TEST_SYMLINK)) + yield test_repo + + +@pytest.mark.parametrize( + ('content', 'result'), + ( + (b'qwer', b'qwer'), + (b'qwer\n', b'qwer'), + (b'qwer\nasdf', b'qwer\nasdf'), + (b'qwer\r\nasdf', b'qwer\nasdf'), + (b' qwer\r\n\tasdf \r\n', b'qwer\nasdf'), + ), +) +def test_normalize_content(content: bytes, result: bytes) -> None: + assert normalize_content(content) == result + + +def test_find_destroyed_symlinks(repo_with_destroyed_symlink): + with repo_with_destroyed_symlink.as_cwd(): + assert find_destroyed_symlinks(autofix=False) == [] + assert main([]) == 0 + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()] + assert main([]) != 0 + assert find_destroyed_symlinks(autofix=True) == [TEST_SYMLINK.encode()] + # check that file is not staged anymore + assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ') + check_call(['git', 'add', TEST_SYMLINK]) + assert main(['--autofix']) != 0 + assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ') + print(file=open(TEST_SYMLINK, 'a')) # add trailing newline + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()] + assert main([]) != 0 + print('0' * 8193, file=open(TEST_SYMLINK, 'w')) + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [] + assert main([]) == 0