From 7d9da14b8487d348a707d44369b5747c715f2449 Mon Sep 17 00:00:00 2001 From: Mikhail Khvoinitsky Date: Sun, 2 Aug 2020 21:25:07 +0300 Subject: [PATCH] New hook 'destroyed-symlinks' to detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to --- .pre-commit-hooks.yaml | 7 ++ README.md | 9 +++ pre_commit_hooks/destroyed_symlinks.py | 108 +++++++++++++++++++++++++ setup.cfg | 1 + tests/destroyed_symlinks_test.py | 80 ++++++++++++++++++ 5 files changed, 205 insertions(+) create mode 100755 pre_commit_hooks/destroyed_symlinks.py create mode 100644 tests/destroyed_symlinks_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e4dc9ea..4f118b6b 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -100,6 +100,13 @@ entry: debug-statement-hook language: python types: [python] +- id: destroyed-symlinks + name: Detect Destroyed Symlinks + description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. + entry: destroyed-symlinks + language: python + types: [file] + pass_filenames: false - id: detect-aws-credentials name: Detect AWS Credentials description: Detects *your* aws credentials from the aws cli credentials file diff --git a/README.md b/README.md index 3552721f..f3c15323 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,15 @@ Attempts to load all yaml files to verify syntax. #### `debug-statements` Check for debugger imports and py37+ `breakpoint()` calls in python source. +#### `destroyed-symlinks` +Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to. +This usually happens on Windows in case when user without a permission for creating symlinks clones repository with symlinks. +The following argument is available: +- `--autofix` - unstage detected broken symlinks so they won't be commited. + Note: this option won't fix the symlinks on the filesystem because, + if the symlink has been destroyed in the first place, there is some reason for that + (see above) which this hook most likely won't be able to fix. + #### `detect-aws-credentials` Checks for the existence of AWS secrets that you have set up with the AWS CLI. The following arguments are available: diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py new file mode 100755 index 00000000..11ca745c --- /dev/null +++ b/pre_commit_hooks/destroyed_symlinks.py @@ -0,0 +1,108 @@ +import argparse +import sys +from operator import methodcaller +from subprocess import check_call +from subprocess import check_output +from typing import Optional +from typing import Sequence + +ORDINARY_CHANGED_ENTRIES_MARKER = b'1' +PERMS_LINK = b'120000' +PERMS_NONEXIST = b'000000' + + +def normalize_content(content: bytes) -> bytes: + return b'\n'.join( + filter( + None, + map( + methodcaller('strip'), + content.splitlines(), + ), + ), + ) + + +def find_destroyed_symlinks(autofix: bool) -> Sequence[bytes]: + destroyed_links = [] + for line in check_output( + ['git', 'status', '--porcelain=v2', '-z'], + ).split(b'\0'): + splitted = line.split(b' ') + if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER: + # variable names are taken from + # https://git-scm.com/docs/git-status#_changed_tracked_entries + _, XY, sub, mH, mI, mW, hH, hI, *path_splitted = splitted + path = b' '.join(path_splitted) + if all(( + mH == PERMS_LINK, + mI != PERMS_LINK, + mI != PERMS_NONEXIST, + )): + found_destroyed_link = False + if hH == hI: + # if old and new hashes are equal, it's not needed to check + # anything more, we've found a destroyed symlink for sure + found_destroyed_link = True + else: + # if old and new hashes are *not* equal, it doesn't mean + # that everything is OK - new file may be altered + # by something like trailing-whitespace and/or + # mixed-line-ending hooks so we need to go deeper + index_size = int( + check_output(['git', 'cat-file', '-s', hI]).strip(), + ) + # Most filesystems limit path length to 4096 bytes. + # In the worst (insane) case when symlink points to a file + # which path consists of pure newlines and slashes, + # after converting it to Windows line break, its size + # in any case won't be bigger than 4096*2, so if new file + # is bigger than this, we can safely assume that it is not + # destroyed symlink but a valid new file + # instead of the symlink. + if index_size <= 8192: + head_content = normalize_content( + check_output(['git', 'cat-file', '-p', hH]), + ) + index_content = normalize_content( + check_output(['git', 'cat-file', '-p', hI]), + ) + found_destroyed_link = head_content == index_content + if found_destroyed_link: + destroyed_links.append(path) + if autofix: + check_call([ + 'git', + 'update-index', + '--cacheinfo', + b','.join(( + PERMS_LINK, + hH, + path, + )), + ]) + return destroyed_links + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + '--autofix', action='store_true', help='unstage broken symlinks', + ) + args = parser.parse_args(argv) + destroyed_links = find_destroyed_symlinks( + autofix=args.autofix, + ) + if destroyed_links: + print('Destroyed symlinks:', flush=True) + for destroyed_link in destroyed_links: + sys.stdout.buffer.write(b'- ') + sys.stdout.buffer.write(destroyed_link) + sys.stdout.buffer.write(b'\n') + sys.stdout.buffer.flush() + return 1 + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.cfg b/setup.cfg index 47b8bb6d..d6047c99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ console_scripts = check-xml = pre_commit_hooks.check_xml:main check-yaml = pre_commit_hooks.check_yaml:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main + destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main detect-private-key = pre_commit_hooks.detect_private_key:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py new file mode 100644 index 00000000..f8c4e5be --- /dev/null +++ b/tests/destroyed_symlinks_test.py @@ -0,0 +1,80 @@ +import os +from subprocess import check_call +from subprocess import check_output + +import pytest + +from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks +from pre_commit_hooks.destroyed_symlinks import main +from pre_commit_hooks.destroyed_symlinks import normalize_content + +TEST_SYMLINK = 'test_symlink' + + +@pytest.fixture +def repo_with_destroyed_symlink(tmpdir): + source_repo = tmpdir.join('src') + os.makedirs(source_repo, exist_ok=True) + test_repo = tmpdir.join('test') + with source_repo.as_cwd(): + check_call(['git', 'init']) + os.symlink('/doesnt/really/matters', TEST_SYMLINK) + check_call(['git', 'add', '.']) + check_call(['git', 'commit', '--no-gpg-sign', '-m', 'initial']) + assert check_output( + ['git', 'cat-file', '-p', 'HEAD^{tree}'], + ).startswith(b'120000') + check_call( + ['git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo], + ) + with test_repo.as_cwd(): + check_call(['git', 'config', '--local', 'core.symlinks', 'true']) + assert not os.path.islink(test_repo.join(TEST_SYMLINK)) + yield test_repo + + +@pytest.mark.parametrize( + ('content', 'result'), + ( + (b'qwer', b'qwer'), + (b'qwer\n', b'qwer'), + (b'qwer\nasdf', b'qwer\nasdf'), + (b'qwer\r\nasdf', b'qwer\nasdf'), + (b' qwer\r\n\tasdf \r\n', b'qwer\nasdf'), + ), +) +def test_normalize_content(content: bytes, result: bytes) -> None: + assert normalize_content(content) == result + + +def test_find_destroyed_symlinks(repo_with_destroyed_symlink): + with repo_with_destroyed_symlink.as_cwd(): + assert find_destroyed_symlinks(autofix=False) == [] + assert main([]) == 0 + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [ + TEST_SYMLINK.encode(), + ] + assert main([]) != 0 + assert find_destroyed_symlinks(autofix=True) == [TEST_SYMLINK.encode()] + # check that file is not staged anymore + assert check_output(['git', 'status', '--porcelain=v2']).startswith( + b'1 .T ', + ) + check_call(['git', 'add', TEST_SYMLINK]) + assert main(['--autofix']) != 0 + assert check_output(['git', 'status', '--porcelain=v2']).startswith( + b'1 .T ', + ) + with open(TEST_SYMLINK, 'a') as f: + print(file=f) # add trailing newline + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [ + TEST_SYMLINK.encode(), + ] + assert main([]) != 0 + with open(TEST_SYMLINK, 'w') as f: + print('0' * 8193, file=f) + check_call(['git', 'add', TEST_SYMLINK]) + assert find_destroyed_symlinks(autofix=False) == [] + assert main([]) == 0