feat: add linter script that purifies text files

Text files with trailing white spaces (one of \t \v \n \r and space) lead to unnecessary diffs and make merging more difficult. We should remove them. However, developers should NOT be drained on such repetitive tasks as 'removing trailing white spaces'. Indeed, code reviews should focus on 'what was said' rather how 'how it was said'. This comit enables the following workflow: - After a fresh clone of this repository, the developer runs the following commands to perform a one-time setup of the pre-commit hooks for this clone of the repository: # https://pre-commit.com/ sudo pip3 install pre-commit pre-commit install These commands will install a .git/pre-commit script that searches for file .pre-commit-config.yaml in the root directory of this repository. - Every `git commit` afterwards will trigger the hooks configured in the .pre-commit-config.yaml file. In this case, the python script ./tools/lint/purify_text.py will be called to process each staged file to strip trailing whitespaces. - In the future, if this repository needs more linters and formatters, just configure the .pre-commit-config.yaml file to add more or modify existing ones.
stackb · Dec 18, 2018 · 72d63cc · 72d63cc
1 parent d6d5617
commit 72d63cc
Show file tree

Hide file tree

Showing 4 changed files with 167 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,17 @@
+repos:
+    - repo: local
+      hooks:
+          # Purify text files.
+          #
+          # This hook combines the EOF-fixer, trailing-whitespace, and converts
+          # to UNIX new-line charaters.
+          #
+          # When running against an empty file, it does not append the new-line
+          # character to EOF.
+          - id: purify-text
+            entry: ./tools/lint/purify_text.py
+            language: python
+            name: purify-text
+            types: [text]
+            stages:
+                - commit
diff --git a/tools/lint/purify_text.py b/tools/lint/purify_text.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+r'''Purify text files.
+
+
+1.  In each line, remove trailing whitespace characters.
+    Whitespace characters are:
+            \t \v \n \r space
+
+2.  In each file, remove leading and trailing empty lines, i.e., the whole line
+    only consists of whitespace characters.
+
+
+3.  Add a UNIX new-line character \n to EOF if the EOF does not have one
+    already.  The only exception to this rule is when the file is actually
+    empty.
+
+
+4.  Convert to UNIX line ending characters.
+    This means to convert \r\n to \n.
+'''
+
+import argparse
+import re
+
+
+
+__EMPTYLINE_REGEX__ = re.compile(r'^\s*$')
+
+
+def fix_lines(lines):
+    '''Purify a list of lines.
+
+    Args:
+        line: The list of lines to process.
+
+    Returns:
+        The purified lines, as a list of strings.
+    '''
+    retval = []
+
+    # Fix each line.
+    for line in lines:
+        line = fix_line(line)
+        retval.append(line)
+
+    # Remove leading and trailing empty lines.
+    while retval and __EMPTYLINE_REGEX__.match(retval[0]):
+        retval.pop(0)
+    while retval and __EMPTYLINE_REGEX__.match(retval[-1]):
+        retval.pop(-1)
+
+    return retval
+
+
+def fix_line(line):
+    '''Purify one line.
+
+    Args:
+        line: String. The line to process.
+
+    Returns:
+        The purified line.
+    '''
+    return line.rstrip() + '\n'
+
+
+def purify_text_files(filelist):
+    '''Purify text files in-place.
+
+    The detailed specs of the purification are described in the module
+    docstring.
+
+    Args:
+        filelist: A list of file paths.
+    '''
+    for path in filelist:
+        with open(path, 'r') as f:
+            old_lines = f.readlines()
+        new_lines = fix_lines(old_lines)
+        if old_lines != new_lines:
+            with open(path, 'w') as f:
+                for line in new_lines:
+                    f.write(line)
+            print('Fixing', path)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'files', nargs=argparse.REMAINDER, help='Paths to the files to format.'
+    )
+    args = parser.parse_args()
+    purify_text_files(args.files)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/lint/purify_text_test.py b/tools/lint/purify_text_test.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+# pylint: disable=missing-docstring
+import unittest
+
+import purify_text  # pylint: disable=import-error
+
+
+class TestPurifyTest(unittest.TestCase):
+
+    def test_fix_lines(self):
+        data = [
+            # 2-tuples of (expected processed lines, original lines)
+
+            # empty and nearly empty files
+            ([], []),
+            ([], ['\n']),
+            ([], ['\r']),
+            ([], ['\t']),
+            ([], ['\v']),
+            ([], [' \n']),
+            ([], [' \t']),
+            ([], [' \v']),
+            ([], ['\v\t']),
+            ([], [' \v\t']),
+
+            # one-line files
+            (['foo\n'], ['foo\n']),
+            (['  foo\n'], ['  foo\n']),
+            (['\tfoo\n'], ['\tfoo\n']),
+            (['\vfoo\n'], ['\vfoo\n']),
+
+            # trailing empty lines
+            (['foo\n'], ['foo\n', '\n']),
+            (['foo\n'], ['foo\n', ' \n']),
+            (['foo\n'], ['foo\n', '\t\n']),
+            (['foo\n'], ['foo\n', '\v\n']),
+
+            # leading empty lines
+            (['foo\n'], ['\n', 'foo\n']),
+            (['foo\n'], [' \n', 'foo\n']),
+            (['foo\n'], ['\t\n', 'foo\n']),
+            (['foo\n'], ['\v\n', 'foo\n']),
+        ]
+        for expected, lines in data:
+            actual = purify_text.fix_lines(lines)
+            self.assertEqual(expected, actual)
+
+
+if __name__ == '__main__':
+    unittest.main()