-
Notifications
You must be signed in to change notification settings - Fork 148
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add linter script that purifies text files
Text files with trailing white spaces (one of \t \v \n \r and space) lead to unnecessary diffs and make merging more difficult. We should remove them. However, developers should NOT be drained on such repetitive tasks as 'removing trailing white spaces'. Indeed, code reviews should focus on 'what was said' rather how 'how it was said'. This comit enables the following workflow: - After a fresh clone of this repository, the developer runs the following commands to perform a one-time setup of the pre-commit hooks for this clone of the repository: # https://pre-commit.com/ sudo pip3 install pre-commit pre-commit install These commands will install a .git/pre-commit script that searches for file .pre-commit-config.yaml in the root directory of this repository. - Every `git commit` afterwards will trigger the hooks configured in the .pre-commit-config.yaml file. In this case, the python script ./tools/lint/purify_text.py will be called to process each staged file to strip trailing whitespaces. - In the future, if this repository needs more linters and formatters, just configure the .pre-commit-config.yaml file to add more or modify existing ones.
- Loading branch information
1 parent
d6d5617
commit 72d63cc
Showing
4 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
repos: | ||
- repo: local | ||
hooks: | ||
# Purify text files. | ||
# | ||
# This hook combines the EOF-fixer, trailing-whitespace, and converts | ||
# to UNIX new-line charaters. | ||
# | ||
# When running against an empty file, it does not append the new-line | ||
# character to EOF. | ||
- id: purify-text | ||
entry: ./tools/lint/purify_text.py | ||
language: python | ||
name: purify-text | ||
types: [text] | ||
stages: | ||
- commit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#!/usr/bin/env python3 | ||
|
||
r'''Purify text files. | ||
1. In each line, remove trailing whitespace characters. | ||
Whitespace characters are: | ||
\t \v \n \r space | ||
2. In each file, remove leading and trailing empty lines, i.e., the whole line | ||
only consists of whitespace characters. | ||
3. Add a UNIX new-line character \n to EOF if the EOF does not have one | ||
already. The only exception to this rule is when the file is actually | ||
empty. | ||
4. Convert to UNIX line ending characters. | ||
This means to convert \r\n to \n. | ||
''' | ||
|
||
import argparse | ||
import re | ||
|
||
|
||
|
||
__EMPTYLINE_REGEX__ = re.compile(r'^\s*$') | ||
|
||
|
||
def fix_lines(lines): | ||
'''Purify a list of lines. | ||
Args: | ||
line: The list of lines to process. | ||
Returns: | ||
The purified lines, as a list of strings. | ||
''' | ||
retval = [] | ||
|
||
# Fix each line. | ||
for line in lines: | ||
line = fix_line(line) | ||
retval.append(line) | ||
|
||
# Remove leading and trailing empty lines. | ||
while retval and __EMPTYLINE_REGEX__.match(retval[0]): | ||
retval.pop(0) | ||
while retval and __EMPTYLINE_REGEX__.match(retval[-1]): | ||
retval.pop(-1) | ||
|
||
return retval | ||
|
||
|
||
def fix_line(line): | ||
'''Purify one line. | ||
Args: | ||
line: String. The line to process. | ||
Returns: | ||
The purified line. | ||
''' | ||
return line.rstrip() + '\n' | ||
|
||
|
||
def purify_text_files(filelist): | ||
'''Purify text files in-place. | ||
The detailed specs of the purification are described in the module | ||
docstring. | ||
Args: | ||
filelist: A list of file paths. | ||
''' | ||
for path in filelist: | ||
with open(path, 'r') as f: | ||
old_lines = f.readlines() | ||
new_lines = fix_lines(old_lines) | ||
if old_lines != new_lines: | ||
with open(path, 'w') as f: | ||
for line in new_lines: | ||
f.write(line) | ||
print('Fixing', path) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
'files', nargs=argparse.REMAINDER, help='Paths to the files to format.' | ||
) | ||
args = parser.parse_args() | ||
purify_text_files(args.files) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# pylint: disable=missing-docstring | ||
import unittest | ||
|
||
import purify_text # pylint: disable=import-error | ||
|
||
|
||
class TestPurifyTest(unittest.TestCase): | ||
|
||
def test_fix_lines(self): | ||
data = [ | ||
# 2-tuples of (expected processed lines, original lines) | ||
|
||
# empty and nearly empty files | ||
([], []), | ||
([], ['\n']), | ||
([], ['\r']), | ||
([], ['\t']), | ||
([], ['\v']), | ||
([], [' \n']), | ||
([], [' \t']), | ||
([], [' \v']), | ||
([], ['\v\t']), | ||
([], [' \v\t']), | ||
|
||
# one-line files | ||
(['foo\n'], ['foo\n']), | ||
([' foo\n'], [' foo\n']), | ||
(['\tfoo\n'], ['\tfoo\n']), | ||
(['\vfoo\n'], ['\vfoo\n']), | ||
|
||
# trailing empty lines | ||
(['foo\n'], ['foo\n', '\n']), | ||
(['foo\n'], ['foo\n', ' \n']), | ||
(['foo\n'], ['foo\n', '\t\n']), | ||
(['foo\n'], ['foo\n', '\v\n']), | ||
|
||
# leading empty lines | ||
(['foo\n'], ['\n', 'foo\n']), | ||
(['foo\n'], [' \n', 'foo\n']), | ||
(['foo\n'], ['\t\n', 'foo\n']), | ||
(['foo\n'], ['\v\n', 'foo\n']), | ||
] | ||
for expected, lines in data: | ||
actual = purify_text.fix_lines(lines) | ||
self.assertEqual(expected, actual) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |