diff --git a/polymath_code_standard/checkers/copyright.py b/polymath_code_standard/checkers/copyright.py index 563365a..6a67e39 100644 --- a/polymath_code_standard/checkers/copyright.py +++ b/polymath_code_standard/checkers/copyright.py @@ -8,6 +8,7 @@ from pathlib import Path from polymath_code_standard.checker import CheckerGroup, Result, check_group, filter_files +from polymath_code_standard.insert_license import COPYRIGHT_ORG_SENTINEL from polymath_code_standard.licenses import PROPRIETARY, get_license_full_text, get_license_header @@ -24,12 +25,23 @@ def register_args(self, subparser: argparse.ArgumentParser) -> None: metavar='SPDX_ID', help="SPDX license ID (e.g. MIT, Apache-2.0) or 'proprietary'", ) - subparser.add_argument( + org_group = subparser.add_mutually_exclusive_group(required=True) + org_group.add_argument( '--copyright-org', - required=True, + default=None, metavar='ORG', help='Organization name for the copyright line', ) + org_group.add_argument( + '--wildcard-copyright-org', + action='store_true', + dest='wildcard_copyright_org', + help=( + 'Accept any copyright holder on the copyright line (for multi-contributor repos). ' + f'New headers are inserted with the sentinel "{COPYRIGHT_ORG_SENTINEL}" ' + "which fails the check until replaced with the contributor's organization." + ), + ) subparser.add_argument( '--copyright-year', default=str(datetime.date.today().year), @@ -48,8 +60,9 @@ def register_args(self, subparser: argparse.ArgumentParser) -> None: ) def run(self, args: argparse.Namespace) -> list[Result]: + insert_org = COPYRIGHT_ORG_SENTINEL if args.wildcard_copyright_org else args.copyright_org header_text = get_license_header( - args.license_id, args.copyright_year, args.copyright_org, reuse_style_header=args.reuse_style + args.license_id, args.copyright_year, insert_org, reuse_style_header=args.reuse_style ) py_cmake_shell = filter_files(args.files, frozenset({'python', 'cmake', 'shell'})) cpp = filter_files(args.files, frozenset({'c', 'c++'})) @@ -60,6 +73,7 @@ def run(self, args: argparse.Namespace) -> list[Result]: for f in cpp: self._strip_leading_comment_block(f, '//') + wildcard_flag = ['--wildcard-copyright-org'] if args.wildcard_copyright_org else [] fd, license_filepath = tempfile.mkstemp(suffix='.txt', prefix='polymath_license_') try: os.write(fd, header_text.encode('utf-8')) @@ -74,13 +88,15 @@ def run(self, args: argparse.Namespace) -> list[Result]: '#', '--allow-past-years', '--no-extra-eol', - ], + ] + + wildcard_flag, py_cmake_shell, name='copyright (py/cmake/shell)', ), self._check( 'polymath_copyright_header', - ['--license-filepath', license_filepath, '--comment-style', '//', '--allow-past-years'], + ['--license-filepath', license_filepath, '--comment-style', '//', '--allow-past-years'] + + wildcard_flag, cpp, name='copyright (cpp)', ), @@ -91,7 +107,7 @@ def run(self, args: argparse.Namespace) -> list[Result]: except OSError: pass - results.append(self._check_license_file(args.license_id, args.copyright_year, args.copyright_org)) + results.append(self._check_license_file(args.license_id, args.copyright_year, args.copyright_org or '')) return results @staticmethod diff --git a/polymath_code_standard/insert_license.py b/polymath_code_standard/insert_license.py index 937c45c..d38a65f 100644 --- a/polymath_code_standard/insert_license.py +++ b/polymath_code_standard/insert_license.py @@ -26,6 +26,8 @@ SKIP_LICENSE_INSERTION_COMMENT = 'SKIP LICENSE INSERTION' +COPYRIGHT_ORG_SENTINEL = 'SET_YOUR_ORGANIZATION_HERE' + DEBUG_LEVENSHTEIN_DISTANCE_CALCULATION = False LicenseInfo = collections.namedtuple( @@ -89,6 +91,16 @@ def main(argv=None) -> int: help="Insert license after line matching regex (ex: '^<\\?php$')", ) parser.add_argument('--remove-header', action='store_true') + parser.add_argument( + '--wildcard-copyright-org', + action='store_true', + dest='wildcard_copyright_org', + help=( + 'Accept any copyright holder on the copyright line. ' + f'New headers are inserted with the sentinel "{COPYRIGHT_ORG_SENTINEL}" ' + 'which fails the check until replaced.' + ), + ) parser.add_argument( '--use-current-year', action='store_true', @@ -194,6 +206,15 @@ def process_files( top_lines_count=args.detect_license_in_X_top_lines, ): continue + if args.wildcard_copyright_org and copyright_sentinel_found( + src_file_content, args.detect_license_in_X_top_lines + ): + print( + f'{src_filepath}: copyright org placeholder "{COPYRIGHT_ORG_SENTINEL}" ' + 'not yet replaced — update the copyright line with your organization name' + ) + license_update_failed = True + continue if fail_license_todo_found( src_file_content=src_file_content, fuzzy_match_todo_comment=args.fuzzy_match_todo_comment, @@ -210,6 +231,7 @@ def process_files( license_info=license_info, top_lines_count=args.detect_license_in_X_top_lines, match_years_strictly=not args.allow_past_years, + wildcard_copyright_org=args.wildcard_copyright_org, ) if license_header_index is not None: break @@ -421,22 +443,24 @@ def _strip_years(line): return _YEARS_PATTERN.sub('', line) -def _license_line_matches(license_line, src_file_line, match_years_strictly): +def _license_line_matches(license_line, src_file_line, match_years_strictly, wildcard_copyright_org=False): license_line = license_line.strip() src_file_line = src_file_line.strip() + if wildcard_copyright_org and _is_copyright_line(license_line): + return _is_copyright_line(src_file_line) if match_years_strictly: return license_line == src_file_line return _strip_years(license_line) == _strip_years(src_file_line) def find_license_header_index( - src_file_content, license_info: LicenseInfo, top_lines_count, match_years_strictly + src_file_content, license_info: LicenseInfo, top_lines_count, match_years_strictly, wildcard_copyright_org=False ) -> int | None: for i in range(top_lines_count): license_match = True for j, license_line in enumerate(license_info.prefixed_license): if i + j >= len(src_file_content) or not _license_line_matches( - license_line, src_file_content[i + j], match_years_strictly + license_line, src_file_content[i + j], match_years_strictly, wildcard_copyright_org ): license_match = False break @@ -445,6 +469,18 @@ def find_license_header_index( return None +def _is_copyright_line(stripped_line: str) -> bool: + """Return True if a stripped line is a copyright attribution (any comment style).""" + return stripped_line.lstrip('/#').lstrip().lower().startswith('copyright') + + +def copyright_sentinel_found(src_file_content, top_lines_count): + for i in range(min(top_lines_count, len(src_file_content))): + if COPYRIGHT_ORG_SENTINEL in src_file_content[i]: + return True + return False + + def skip_license_insert_found(src_file_content, skip_license_insertion_comment, top_lines_count): for i in range(top_lines_count): if i < len(src_file_content) and skip_license_insertion_comment in src_file_content[i]: diff --git a/tests/test_insert_license_wildcard.py b/tests/test_insert_license_wildcard.py new file mode 100644 index 0000000..f756773 --- /dev/null +++ b/tests/test_insert_license_wildcard.py @@ -0,0 +1,245 @@ +# SPDX-FileCopyrightText: 2026 Polymath Robotics, Inc. +# SPDX-License-Identifier: Apache-2.0 +"""Tests for wildcard copyright org matching and sentinel insertion in insert_license.""" + +from polymath_code_standard.insert_license import ( + COPYRIGHT_ORG_SENTINEL, + LicenseInfo, + _is_copyright_line, + _license_line_matches, + copyright_sentinel_found, + find_license_header_index, + main, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_APACHE_PREFIXED = [ + '# Copyright 2026 Polymath Robotics, Inc.\n', + '#\n', + '# Licensed under the Apache License, Version 2.0 (the "License");\n', + '# you may not use this file except in compliance with the License.\n', + '# You may obtain a copy of the License at\n', + '#\n', + '# http://www.apache.org/licenses/LICENSE-2.0\n', + '#\n', + '# Unless required by applicable law or agreed to in writing, software\n', + '# distributed under the License is distributed on an "AS IS" BASIS,\n', + '# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n', + '# See the License for the specific language governing permissions and\n', + '# limitations under the License.\n', +] + +_LICENSE_INFO = LicenseInfo( + prefixed_license=_APACHE_PREFIXED, + plain_license=[line.lstrip('# ') for line in _APACHE_PREFIXED], + eol='', + comment_start=None, + comment_prefix='#', + comment_end=None, + num_extra_lines=0, +) + + +def _file_with_header(org: str) -> list[str]: + lines = list(_APACHE_PREFIXED) + lines[0] = f'# Copyright 2024 {org}\n' + return lines + ['import foo\n'] + + +# --------------------------------------------------------------------------- +# _is_copyright_line +# --------------------------------------------------------------------------- + + +class TestIsCopyrightLine: + def test_hash_style(self): + assert _is_copyright_line('# Copyright 2026 Acme Corp') + + def test_slash_style(self): + assert _is_copyright_line('// Copyright 2026 Acme Corp') + + def test_plain(self): + assert _is_copyright_line('Copyright 2026 Acme Corp') + + def test_non_copyright(self): + assert not _is_copyright_line('# Licensed under the Apache License') + + def test_blank_comment(self): + assert not _is_copyright_line('#') + + +# --------------------------------------------------------------------------- +# _license_line_matches — wildcard_copyright_org +# --------------------------------------------------------------------------- + + +class TestLicenseLineMatchesWildcard: + def test_different_org_matches_with_wildcard(self): + assert _license_line_matches( + '# Copyright 2026 Polymath Robotics, Inc.', + '# Copyright 2023 Some Other Corp.', + match_years_strictly=False, + wildcard_copyright_org=True, + ) + + def test_different_org_fails_without_wildcard(self): + assert not _license_line_matches( + '# Copyright 2026 Polymath Robotics, Inc.', + '# Copyright 2026 Some Other Corp.', + match_years_strictly=False, + wildcard_copyright_org=False, + ) + + def test_non_copyright_line_still_requires_exact_match(self): + assert not _license_line_matches( + '# Licensed under the Apache License, Version 2.0', + '# Licensed under the MIT License', + match_years_strictly=False, + wildcard_copyright_org=True, + ) + + def test_non_copyright_line_matches_exactly(self): + assert _license_line_matches( + '# Licensed under the Apache License, Version 2.0', + '# Licensed under the Apache License, Version 2.0', + match_years_strictly=False, + wildcard_copyright_org=True, + ) + + +# --------------------------------------------------------------------------- +# copyright_sentinel_found +# --------------------------------------------------------------------------- + + +class TestCopyrightSentinelFound: + def test_detects_sentinel(self): + content = [f'# Copyright 2026 {COPYRIGHT_ORG_SENTINEL}\n', '#\n', 'import foo\n'] + assert copyright_sentinel_found(content, top_lines_count=5) + + def test_no_sentinel(self): + content = ['# Copyright 2026 Acme Corp\n', '#\n', 'import foo\n'] + assert not copyright_sentinel_found(content, top_lines_count=5) + + def test_sentinel_beyond_top_lines_not_detected(self): + content = ['import foo\n', 'import bar\n', f'# {COPYRIGHT_ORG_SENTINEL}\n'] + assert not copyright_sentinel_found(content, top_lines_count=2) + + +# --------------------------------------------------------------------------- +# find_license_header_index — wildcard matching +# --------------------------------------------------------------------------- + + +class TestFindLicenseHeaderIndexWildcard: + def test_matches_different_org_with_wildcard(self): + content = _file_with_header('Contributor Corp.') + idx = find_license_header_index( + content, _LICENSE_INFO, top_lines_count=5, match_years_strictly=False, wildcard_copyright_org=True + ) + assert idx == 0 + + def test_rejects_different_org_without_wildcard(self): + content = _file_with_header('Contributor Corp.') + idx = find_license_header_index( + content, _LICENSE_INFO, top_lines_count=5, match_years_strictly=False, wildcard_copyright_org=False + ) + assert idx is None + + def test_matches_same_org_with_wildcard(self): + content = _file_with_header('Polymath Robotics, Inc.') + idx = find_license_header_index( + content, _LICENSE_INFO, top_lines_count=5, match_years_strictly=False, wildcard_copyright_org=True + ) + assert idx == 0 + + def test_still_requires_correct_license_boilerplate(self): + content = ['# Copyright 2026 Acme Corp\n', '# MIT License\n', 'import foo\n'] + idx = find_license_header_index( + content, _LICENSE_INFO, top_lines_count=5, match_years_strictly=False, wildcard_copyright_org=True + ) + assert idx is None + + +# --------------------------------------------------------------------------- +# End-to-end: main() with wildcard flag +# --------------------------------------------------------------------------- + + +class TestMainWildcard: + def _write(self, tmp_path, name, content): + p = tmp_path / name + p.write_text(content, encoding='utf-8') + return p + + def _license_file(self, tmp_path, org=COPYRIGHT_ORG_SENTINEL): + lines = list(_APACHE_PREFIXED) + lines[0] = f'# Copyright 2026 {org}\n' + p = tmp_path / 'license.txt' + p.write_text(''.join(line.lstrip('# ') for line in lines), encoding='utf-8') + return str(p) + + def test_sentinel_in_file_fails(self, tmp_path): + src = self._write(tmp_path, 'f.py', f'# Copyright 2026 {COPYRIGHT_ORG_SENTINEL}\n#\nimport foo\n') + lf = self._license_file(tmp_path) + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '#', + '--allow-past-years', + '--no-extra-eol', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 1 + + def test_any_org_passes_with_wildcard(self, tmp_path): + src = self._write(tmp_path, 'f.py', ''.join(_file_with_header('Contributor Corp.'))) + lf = self._license_file(tmp_path, org='Polymath Robotics, Inc.') + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '#', + '--allow-past-years', + '--no-extra-eol', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 0 + + def test_no_header_inserts_sentinel(self, tmp_path): + src = self._write(tmp_path, 'f.py', 'import foo\n') + lf = self._license_file(tmp_path) + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '#', + '--allow-past-years', + '--no-extra-eol', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 1 + assert COPYRIGHT_ORG_SENTINEL in src.read_text() + + def test_without_wildcard_rejects_different_org(self, tmp_path): + src = self._write(tmp_path, 'f.py', ''.join(_file_with_header('Contributor Corp.'))) + lf = self._license_file(tmp_path, org='Polymath Robotics, Inc.') + original = src.read_text() + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '#', + '--allow-past-years', + '--no-extra-eol', + str(src), + ]) + assert ret == 1 + assert src.read_text() != original