Skip to content

Commit

Permalink
checks.whitespace: add initial check for bad whitespace character usage
Browse files Browse the repository at this point in the history
  • Loading branch information
radhermit committed Dec 3, 2019
1 parent a821ba7 commit 81f97bf
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 1 deletion.
48 changes: 47 additions & 1 deletion src/pkgcheck/checks/whitespace.py
@@ -1,3 +1,7 @@
import re
import sys
from collections import namedtuple

from snakeoil.demandload import demand_compile_regexp
from snakeoil.strings import pluralism as _pl

Expand Down Expand Up @@ -63,15 +67,52 @@ class NoFinalNewline(results.VersionResult, results.Warning):
desc = "ebuild lacks an ending newline"


class BadWhitespaceCharacter(results.LineResult, results.Error):
"""Ebuild uses whitespace that isn't one of '\t', '\n', or ' '."""

def __init__(self, char, **kwargs):
super().__init__(**kwargs)
self.char = char

@property
def desc(self):
return f"bad whitespace character {self.char} on line {self.lineno}: {self.line}"

@staticmethod
def bad_whitespace_chars():
"""Generate tuple of bad whitespace characters."""
all_whitespace_chars = set(
re.findall(r'\s', ''.join(chr(c) for c in range(sys.maxunicode + 1))))
allowed_whitespace_chars = {'\t', '\n', ' '}
return tuple(sorted(all_whitespace_chars - allowed_whitespace_chars))


WhitespaceData = namedtuple('WhitespaceData', ['unicode_version', 'chars'])
whitespace_data = WhitespaceData(
'12.1.0',
(
'\x0b', '\x0c', '\r', '\x1c', '\x1d', '\x1e', '\x1f', '\x85',
'\xa0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004',
'\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200a',
'\u2028', '\u2029', '\u202f', '\u205f', '\u3000',
)
)


class WhitespaceCheck(Check):
"""Scan ebuild for useless whitespace."""

_source = sources.EbuildFileRepoSource
known_results = frozenset([
WhitespaceFound, WrongIndentFound, DoubleEmptyLine,
TrailingEmptyLine, NoFinalNewline,
TrailingEmptyLine, NoFinalNewline, BadWhitespaceCharacter
])

def __init__(self, *args):
super().__init__(*args)
bad_whitespace = ''.join(whitespace_data.chars)
self.bad_whitespace_regex = re.compile(rf'(?P<char>[{bad_whitespace}])')

def feed(self, pkg):
lastlineempty = False
trailing = []
Expand All @@ -80,6 +121,11 @@ def feed(self, pkg):
double_empty = []

for lineno, line in enumerate(pkg.lines, 1):
if line[0] != '#':
for match in self.bad_whitespace_regex.finditer(line):
yield BadWhitespaceCharacter(
repr(match.group('char')), line=repr(line), lineno=lineno, pkg=pkg)

if line != '\n':
lastlineempty = False
if line[-2:-1] == ' ' or line[-2:-1] == '\t':
Expand Down
@@ -0,0 +1,2 @@
WhitespaceCheck/BadWhitespaceCharacter
BadWhitespaceCharacter: version 0: bad whitespace character '\xa0' on line 8: '\tcd "${S}"/cpp ||\xa0die\n'
@@ -0,0 +1,11 @@
diff -Naur standalone/WhitespaceCheck/BadWhitespaceCharacter/BadWhitespaceCharacter-0.ebuild fixed/WhitespaceCheck/BadWhitespaceCharacter/BadWhitespaceCharacter-0.ebuild
--- standalone/WhitespaceCheck/BadWhitespaceCharacter/BadWhitespaceCharacter-0.ebuild 2019-12-02 18:58:31.312559463 -0700
+++ fixed/WhitespaceCheck/BadWhitespaceCharacter/BadWhitespaceCharacter-0.ebuild 2019-12-02 18:59:44.859861442 -0700
@@ -5,6 +5,6 @@
LICENSE="BSD"

src_test() {
- cd "${S}"/cpp || die
+ cd "${S}"/cpp || die
default
}
24 changes: 24 additions & 0 deletions tests/module/checks/test_whitespace.py
@@ -1,3 +1,5 @@
import unicodedata

from pkgcheck.checks import whitespace

from .. import misc
Expand Down Expand Up @@ -101,6 +103,28 @@ def test_it(self):
assert 'trailing blank line(s)' in str(r)


class TestBadWhitespaceCharacter(WhitespaceCheckTest):

def test_outdated_bad_whitespace_chars(self):
if unicodedata.unidata_version != whitespace.whitespace_data.unicode_version:
bad_whitespace_chars = whitespace.BadWhitespaceCharacter.bad_whitespace_chars()
assert bad_whitespace_chars == whitespace.whitespace_data.chars, \
f'outdated character list for Unicode version {unicodedata.unidata_version}'

def test_bad_whitespace_chars(self):
for char in whitespace.whitespace_data.chars:
fake_src = [
'src_prepare() {\n',
f'\tcd "${{S}}"/cpp ||{char}die\n',
'}\n',
]
fake_pkg = misc.FakePkg("dev-util/diffball-0.5", lines=fake_src)

r = self.assertReport(self.check, fake_pkg)
assert isinstance(r, whitespace.BadWhitespaceCharacter)
assert f'bad whitespace character {repr(char)} on line 2' in str(r)


class TestMultipleChecks(WhitespaceCheckTest):

def test_it(self):
Expand Down
@@ -0,0 +1,10 @@
EAPI=7
DESCRIPTION="Ebuild uses bad whitespace character"
HOMEPAGE="https://github.com/pkgcore/pkgcheck"
SLOT="0"
LICENSE="BSD"

src_test() {
cd "${S}"/cpp || die
default
}

0 comments on commit 81f97bf

Please sign in to comment.