Skip to content

Commit

Permalink
New hook 'destroyed-symlinks' to detect symlinks which are changed to…
Browse files Browse the repository at this point in the history
… regular files with a content of a path which that symlink was pointing to; move zsplit to util; add support for bytes to zsplit
  • Loading branch information
m-khvoinitsky committed Nov 18, 2020
1 parent e1668fe commit 6a43b59
Show file tree
Hide file tree
Showing 9 changed files with 262 additions and 19 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@
entry: debug-statement-hook
language: python
types: [python]
- id: destroyed-symlinks
name: Detect Destroyed Symlinks
description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
entry: destroyed-symlinks
language: python
types: [file]
- id: detect-aws-credentials
name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ Attempts to load all yaml files to verify syntax.
#### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source.

#### `destroyed-symlinks`
Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
This usually happens on Windows in case when user without a permission for creating symlinks clones repository with symlinks.
The following argument is available:
- `--autofix` - unstage detected broken symlinks so they won't be commited.
Note: this option won't fix the symlinks on the filesystem because,
if the symlink has been destroyed in the first place, there is some reason for that
(see above) which this hook most likely won't be able to fix.

#### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available:
Expand Down
9 changes: 1 addition & 8 deletions pre_commit_hooks/check_executables_have_shebangs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,11 @@
from typing import Set

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))


def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []


def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths)
Expand Down
109 changes: 109 additions & 0 deletions pre_commit_hooks/destroyed_symlinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import argparse
import itertools
import shlex
import subprocess
from typing import List
from typing import Optional
from typing import Sequence

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

ORDINARY_CHANGED_ENTRIES_MARKER = '1'
PERMS_LINK = '120000'
PERMS_NONEXIST = '000000'


def normalize_content(content: bytes) -> bytes:
return content.rstrip()


def find_destroyed_symlinks(files: Sequence[str]) -> Sequence[str]:
destroyed_links: List[str] = []
if not files:
return destroyed_links
for line in zsplit(
cmd_output(
*itertools.chain(
('git', 'status', '--porcelain=v2', '-z', '--'),
files,
),
),
):
splitted = line.split(' ')
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
# https://git-scm.com/docs/git-status#_changed_tracked_entries
(
_, _, _,
mode_HEAD,
mode_index,
_,
hash_HEAD,
hash_index,
*path_splitted,
) = splitted
path = ' '.join(path_splitted)
if all((
mode_HEAD == PERMS_LINK,
mode_index != PERMS_LINK,
mode_index != PERMS_NONEXIST,
)):
if hash_HEAD == hash_index:
# if old and new hashes are equal, it's not needed to check
# anything more, we've found a destroyed symlink for sure
destroyed_links.append(path)
else:
# if old and new hashes are *not* equal, it doesn't mean
# that everything is OK - new file may be altered
# by something like trailing-whitespace and/or
# mixed-line-ending hooks so we need to go deeper
SIZE_CMD = ('git', 'cat-file', '-s')
size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())

# in the worst case new file may have CRLF added
# so check content only if new file is bigger
# not more than 2 bytes compared to the old one
if size_index <= size_HEAD + 2:
head_content = normalize_content(
subprocess.check_output(
('git', 'cat-file', '-p', hash_HEAD),
),
)
index_content = normalize_content(
subprocess.check_output(
('git', 'cat-file', '-p', hash_index),
),
)
if head_content == index_content:
destroyed_links.append(path)
return destroyed_links


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
args = parser.parse_args(argv)
destroyed_links = find_destroyed_symlinks(files=args.filenames)
if destroyed_links:
print('Destroyed symlinks:')
for destroyed_link in destroyed_links:
print(f'- {destroyed_link}')
print('You should unstage affected files:')
print(
' git reset HEAD -- {}'.format(
' '.join(map(shlex.quote, destroyed_links)),
),
)
print(
'And retry commit. As a long term solution '
'you may try to explicitly tell git that your '
'envionment does not support symlinks:',
)
print(' git config --local core.symlinks false')
return 1
return 0


if __name__ == '__main__':
exit(main())
27 changes: 26 additions & 1 deletion pre_commit_hooks/util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import subprocess
from typing import Any
from typing import AnyStr
from typing import List
from typing import Optional
from typing import Set
from typing import Union


class CalledProcessError(RuntimeError):
Expand All @@ -13,7 +16,11 @@ def added_files() -> Set[str]:
return set(cmd_output(*cmd).splitlines())


def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str:
def cmd_output(
*cmd: Union[str, bytes],
retcode: Optional[int] = 0,
**kwargs: Any,
) -> str:
kwargs.setdefault('stdout', subprocess.PIPE)
kwargs.setdefault('stderr', subprocess.PIPE)
proc = subprocess.Popen(cmd, **kwargs)
Expand All @@ -22,3 +29,21 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str:
if retcode is not None and proc.returncode != retcode:
raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
return stdout


def zsplit(s: AnyStr) -> List[AnyStr]:
if isinstance(s, bytes):
separator = b'\0'
elif isinstance(s, str):
separator = '\0'
else:
raise TypeError(
'Expected type of argument is str or bytes, got {}'.format(
type(s),
),
)
s = s.strip(separator)
if s:
return s.split(separator)
else:
return []
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ console_scripts =
check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main
Expand Down
10 changes: 0 additions & 10 deletions tests/check_executables_have_shebangs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 1


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_correctly(out):
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert check_executables_have_shebangs.zsplit(out) == []


@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(
Expand Down
89 changes: 89 additions & 0 deletions tests/destroyed_symlinks_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import subprocess

import pytest

from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
from pre_commit_hooks.destroyed_symlinks import main
from pre_commit_hooks.destroyed_symlinks import normalize_content

TEST_SYMLINK = 'test_symlink'
TEST_SYMLINK_TARGET = '/doesnt/really/matters'
TEST_FILE = 'test_file'
TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'


@pytest.fixture
def repo_with_destroyed_symlink(tmpdir):
source_repo = tmpdir.join('src')
os.makedirs(source_repo, exist_ok=True)
test_repo = tmpdir.join('test')
with source_repo.as_cwd():
subprocess.check_call(('git', 'init'))
os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
with open(TEST_FILE, 'w') as f:
print('some random content', file=f)
subprocess.check_call(('git', 'add', '.'))
subprocess.check_call(
('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
)
assert b'120000 ' in subprocess.check_output(
('git', 'cat-file', '-p', 'HEAD^{tree}'),
)
subprocess.check_call(
('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
)
with test_repo.as_cwd():
subprocess.check_call(
('git', 'config', '--local', 'core.symlinks', 'true'),
)
subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
assert not os.path.islink(test_repo.join(TEST_SYMLINK))
yield test_repo


@pytest.mark.parametrize(
('content', 'result'),
(
(b'qwer', b'qwer'),
(b'qwer\n', b'qwer'),
(b'qwer \n', b'qwer'),
(b'qwer ', b'qwer'),
(b'qwer \t\r\n', b'qwer'),
),
)
def test_normalize_content(content: bytes, result: bytes) -> None:
assert normalize_content(content) == result


def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
with repo_with_destroyed_symlink.as_cwd():
assert find_destroyed_symlinks([]) == []
assert main([]) == 0

subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
assert find_destroyed_symlinks([]) == []
assert main([]) == 0
assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'a') as f:
print(file=f) # add trailing newline
subprocess.check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * len(TEST_SYMLINK_TARGET), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0
21 changes: 21 additions & 0 deletions tests/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit


def test_raises_on_error():
Expand All @@ -12,3 +13,23 @@ def test_raises_on_error():
def test_output():
ret = cmd_output('sh', '-c', 'echo hi')
assert ret == 'hi\n'


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_str_correctly(out):
assert zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', (b'\0f1\0f2\0', b'\0f1\0f2', b'f1\0f2\0'))
def test_check_zsplits_bytes_correctly(out):
assert zsplit(out) == [b'f1', b'f2']


@pytest.mark.parametrize('out', ('\0\0', b'\0\0', '\0', b'\0', '', b''))
def test_check_zsplit_returns_empty(out):
assert zsplit(out) == []


def test_check_zsplit_raises_on_wrong_type():
with pytest.raises(TypeError):
zsplit([1, 2, 3]) # type: ignore

0 comments on commit 6a43b59

Please sign in to comment.