Skip to content

Commit

Permalink
make license check more-strict
Browse files Browse the repository at this point in the history
The license text is now expected to match almost exactly (not
accounting for formatting in different file types (e.g. rst vs.
bash script vs. python)
  • Loading branch information
scheibelp committed May 9, 2019
1 parent 7e94774 commit 53ec16c
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 39 deletions.
130 changes: 92 additions & 38 deletions lib/spack/spack/cmd/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
import re
from collections import defaultdict

import llnl.util.tty as tty

Expand Down Expand Up @@ -89,50 +90,103 @@ def list_files(args):
print(os.path.join(spack.paths.spack_root, relpath))


# Error codes for license verification. All values are chosen such that
# bool(value) evaluates to True
OLD_LICENSE, SPDX_MISMATCH, GENERAL_MISMATCH = range(1, 4)


class LicenseError(object):
def __init__(self):
self.error_counts = defaultdict(int)

def add_error(self, error):
self.error_counts[error] += 1

def has_errors(self):
return sum(self.error_counts.values()) > 0

def error_messages(self):
total = sum(self.error_counts.values())
missing = self.error_counts[GENERAL_MISMATCH]
spdx_mismatch = self.error_counts[SPDX_MISMATCH]
old_license = self.error_counts[OLD_LICENSE]
return (
'%d improperly licensed files' % (total),
'files with wrong SPDX-License-Identifier: %d' % spdx_mismatch,
'files with old license header: %d' % old_license,
'files not containing expected license: %d' % missing)


def _check_license(lines, path):
license_lines = [
r'Copyright 2013-(?:201[789]|202\d) Lawrence Livermore National Security, LLC and other', # noqa: E501
r'Spack Project Developers\. See the top-level COPYRIGHT file for details.', # noqa: E501
r'SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)'
]

strict_date = r'Copyright 2013-2019'

found = []

for line in lines:
line = re.sub(r'^[\s#\.]*', '', line)
line = line.rstrip()
for i, license_line in enumerate(license_lines):
if re.match(license_line, line):
# The first line of the license contains the copyright date.
# We allow it to be out of date but print a warning if it is
# out of date.
if i == 0:
if not re.search(strict_date, line):
tty.debug('{0}: copyright date mismatch'.format(path))
found.append(i)

if len(found) == len(license_lines) and found == list(sorted(found)):
return

def old_license(line, path):
if re.search('This program is free software', line):
print('{0}: has old LGPL license header'.format(path))
return OLD_LICENSE

# If the SPDX identifier is present, then there is a mismatch (since it
# did not match the above regex)
def wrong_spdx_identifier(line, path):
m = re.search(r'SPDX-License-Identifier: ([^\n]*)', line)
if m and m.group(1) != apache2_mit_spdx:
print('{0}: SPDX license identifier mismatch'
'(expecting {1}, found {2})'
.format(path, apache2_mit_spdx, m.group(1)))
return SPDX_MISMATCH

checks = [old_license, wrong_spdx_identifier]

for line in lines:
for check in checks:
error = check(line, path)
if error:
return error

print('{0}: the license does not match the expected format'.format(path))
return GENERAL_MISMATCH


def verify(args):
"""verify that files in spack have the right license header"""
errors = 0
missing = 0
old_license = 0

license_errors = LicenseError()

for relpath in _licensed_files(args.root):
path = os.path.join(args.root, relpath)
with open(path) as f:
lines = [line for line in f]

if not any(re.match(regex, relpath) for regex in lgpl_exceptions):
if any(re.match(r'^# This program is free software', line)
for line in lines):
print('%s: has old LGPL license header' % path)
old_license += 1
continue

# how we'll find licenses in files
spdx_expr = r'SPDX-License-Identifier: ([^\n]*)'

# check first <license_lines> lines for required header
first_n_lines = ''.join(lines[:license_lines])
match = re.search(spdx_expr, first_n_lines)

if not match:
print('%s: no license header' % path)
missing += 1
continue

correct = apache2_mit_spdx
actual = match.group(1)
if actual != correct:
print("%s: labeled as '%s', but should be '%s'"
% (path, actual, correct))
errors += 1
continue

if any([errors, missing, old_license]):
tty.die(
'%d improperly licensed files' % (errors + missing + old_license),
'files with no SPDX-License-Identifier: %d' % missing,
'files with wrong SPDX-License-Identifier: %d' % errors,
'files with old license header: %d' % old_license)
lines = [line for line in f][:license_lines]

error = _check_license(lines, path)
if error:
license_errors.add_error(error)

if license_errors.has_errors():
tty.die(*license_errors.error_messages())
else:
tty.msg('No license issues found.')

Expand Down
2 changes: 1 addition & 1 deletion lib/spack/spack/test/cmd/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_verify(tmpdir):
assert str(old_lgpl_header) in out
assert str(correct_header) not in out
assert '3 improperly licensed files' in out
assert re.search(r'files with no SPDX-License-Identifier:\s*1', out)
assert re.search(r'files not containing expected license:\s*1', out)
assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out)
assert re.search(r'files with old license header:\s*1', out)

Expand Down

0 comments on commit 53ec16c

Please sign in to comment.