Skip to content

Commit

Permalink
Add unknown license reference to package dereferencing #2965 #1379
Browse files Browse the repository at this point in the history
There are unknown license statements like "This file is the same
license as the package django" which refers to a package which this
file is a part of. This is fixed by extending the dereferencing logic
to look for packages that the file belongs to and using the detected
licenses from the package.

Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra committed Oct 13, 2022
1 parent 02ab56c commit bfcc3ca
Show file tree
Hide file tree
Showing 67 changed files with 14,748 additions and 222 deletions.
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/apache-2.0_306.RULE
@@ -1 +1 @@
This file is distributed under the same license as the Puppet automation framework package.
This file is distributed under the same license as the {{Puppet}} {{automation}} framework package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/apache-2.0_306.yml
@@ -1,3 +1,5 @@
license_expression: apache-2.0
is_license_notice: yes
relevance: 100
referenced_filenames:
- package
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/bsd-new_1043.RULE
@@ -1,2 +1,2 @@
This file is distributed under the same license as the
debian files of the p11-kit package.
{{debian}} files of the {{p11-kit}} package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/bsd-new_1043.yml
Expand Up @@ -2,3 +2,5 @@ license_expression: bsd-new
is_license_notice: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
-package
4 changes: 2 additions & 2 deletions src/licensedcode/data/rules/bsd-new_1044.RULE
@@ -1,2 +1,2 @@
License: same-as-rest-of-p11kit
This file is distributed under the same license as the p11-kit package.
License: {{same-as-rest-of-p11kit}}
This file is distributed under the same license as the {{p11-kit}} package.
3 changes: 3 additions & 0 deletions src/licensedcode/data/rules/bsd-new_1044.yml
@@ -1,3 +1,6 @@
license_expression: bsd-new
is_license_notice: yes
minimum_coverage: 100
referenced_filenames:
- package

1 change: 1 addition & 0 deletions src/licensedcode/data/rules/free-unknown-package_1.RULE
@@ -0,0 +1 @@
This file is distributed under the same license as the package.
7 changes: 7 additions & 0 deletions src/licensedcode/data/rules/free-unknown-package_1.yml
@@ -0,0 +1,7 @@
license_expression: free-unknown
is_license_reference: yes
is_continuous: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
@@ -1 +1 @@
This file is distributed under the same license as the PACKAGE package
This file is distributed under the same license as the PACKAGE package.
7 changes: 7 additions & 0 deletions src/licensedcode/data/rules/free-unknown-package_2.yml
@@ -0,0 +1,7 @@
license_expression: free-unknown
is_license_reference: yes
is_continuous: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
1 change: 1 addition & 0 deletions src/licensedcode/data/rules/free-unknown-package_3.RULE
@@ -0,0 +1 @@
This file is distributed under the same license as the DJANGO package.
7 changes: 7 additions & 0 deletions src/licensedcode/data/rules/free-unknown-package_3.yml
@@ -0,0 +1,7 @@
license_expression: free-unknown
is_license_reference: yes
is_continuous: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
1 change: 0 additions & 1 deletion src/licensedcode/data/rules/free-unknown_79.RULE

This file was deleted.

4 changes: 0 additions & 4 deletions src/licensedcode/data/rules/free-unknown_79.yml

This file was deleted.

4 changes: 0 additions & 4 deletions src/licensedcode/data/rules/free-unknown_80.yml

This file was deleted.

1 change: 1 addition & 0 deletions src/licensedcode/data/rules/gpl-2.0-plus_1070.RULE
@@ -0,0 +1 @@
This file is distributed under the same license as the {{fusiondirectory}}
6 changes: 6 additions & 0 deletions src/licensedcode/data/rules/gpl-2.0-plus_1070.yml
@@ -0,0 +1,6 @@
license_expression: gpl-2.0-plus
is_license_reference: yes
relevance: 80
minimum_coverage: 100
referenced_filenames:
- package
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/gpl-2.0-plus_31.RULE
@@ -1 +1 @@
# This file is distributed under the same license as the Pychess package.
This file is distributed under the same license as the {{Pychess}} package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/gpl-2.0-plus_31.yml
Expand Up @@ -2,4 +2,6 @@ license_expression: gpl-2.0-plus
is_license_reference: yes
relevance: 80
minimum_coverage: 100
referenced_filenames:
- package
notes: In 2006 Pychess used the GPL 2.0. It became GPL 3.0 in 2011
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/gpl-2.0-plus_793.RULE
@@ -1,2 +1,2 @@
GNU GPL:
This file is distributed under the same license as the dialog package.
This file is distributed under the same license as the {{dialog}} package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/gpl-2.0-plus_793.yml
Expand Up @@ -2,3 +2,5 @@ license_expression: gpl-2.0-plus
is_license_notice: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/gpl-2.0_109.RULE
@@ -1 +1 @@
# This file is distributed under the same license as the cpufrequtils package.
This file is distributed under the same license as the {{cpufrequtils}} package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/gpl-2.0_109.yml
Expand Up @@ -2,4 +2,6 @@ license_expression: gpl-2.0
is_license_reference: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
notes: cpufrequtils is gpl-2.0 only
2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/lgpl-2.1-plus_283.RULE
@@ -1 +1 @@
This file is distributed under the same license as the dialog package.
This file is distributed under the same license as the {{dialog}} package.
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/lgpl-2.1-plus_283.yml
Expand Up @@ -2,3 +2,5 @@ license_expression: lgpl-2.1-plus
is_license_notice: yes
relevance: 100
minimum_coverage: 100
referenced_filenames:
- package
1 change: 0 additions & 1 deletion src/licensedcode/data/rules/license-intro_6.RULE

This file was deleted.

3 changes: 0 additions & 3 deletions src/licensedcode/data/rules/license-intro_6.yml

This file was deleted.

2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/openssl-ssleay_44.RULE
@@ -1 +1 @@
This file is distributed under the same license as OpenSSL
This file is distributed under the same license as {{OpenSSL}}
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/openssl-ssleay_44.yml
@@ -1,4 +1,6 @@
license_expression: openssl-ssleay
is_license_notice: yes
relevance: 100
referenced_filenames:
- package
notes: Seen in https://github.com/schmel/openssl-gost/blob/18028d11296f4675276f0814674c5143caaa75e7/engine/gost_grasshopper_mac.c#L3
5 changes: 0 additions & 5 deletions src/licensedcode/data/rules/other-permissive_91.RULE

This file was deleted.

3 changes: 0 additions & 3 deletions src/licensedcode/data/rules/other-permissive_91.yml

This file was deleted.

2 changes: 1 addition & 1 deletion src/licensedcode/data/rules/ruby_or_lgpl-3.0-plus_3.RULE
@@ -1 +1 @@
This file is distributed under the same license as gettext.
This file is distributed under the same license as {{gettext}}
2 changes: 2 additions & 0 deletions src/licensedcode/data/rules/ruby_or_lgpl-3.0-plus_3.yml
Expand Up @@ -2,4 +2,6 @@ license_expression: ruby OR lgpl-3.0-plus
is_license_notice: yes
relevance: 90
minimum_coverage: 100
referenced_filenames:
- package
notes: this is found mostly in Ruby gettext
10 changes: 8 additions & 2 deletions src/licensedcode/detection.py
Expand Up @@ -81,6 +81,7 @@ class DetectionCategory(Enum):
PERFECT_DETECTION = 'perfect-detection'
UNKNOWN_INTRO_BEFORE_DETECTION = 'unknown-intro-before-detection'
UNKNOWN_FILE_REFERENCE_LOCAL = 'unknown-file-reference-local'
UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE = 'unknown-reference-in-file-to-package'
PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL = 'package-unknown-file-reference-local'
PACKAGE_ADD_FROM_SIBLING_FILE = 'from-package-sibling-file'
PACKAGE_ADD_FROM_FILE = 'from-package-file'
Expand All @@ -99,6 +100,7 @@ class DetectionRule(Enum):
FALSE_POSITIVE = 'false-positive'
UNKNOWN_REFERENCE_TO_LOCAL_FILE = 'unknown-reference-to-local-file'
UNKNOWN_INTRO_FOLLOWED_BY_MATCH = 'unknown-intro-followed-by-match'
UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE = 'unknown-reference-in-file-to-package'
CONTAINED_SAME_LICENSE = 'contained-with-same-license'
NOTICE_FOLLOWED_BY_TEXT = 'notice-followed-by-text'
CONTIGUOUS_SAME_LICENSE = 'contiguous-with-same-license'
Expand Down Expand Up @@ -443,6 +445,7 @@ def is_correct_detection(license_matches):
Return True if all the matches in `license_matches` List of LicenseMatch
are correct license detections.
"""
#TODO: Add matches with full match coverage
matchers = (license_match.matcher for license_match in license_matches)
return (
all(matcher in ("1-hash", "1-spdx-id") for matcher in matchers)
Expand Down Expand Up @@ -578,7 +581,6 @@ def is_license_clues(license_matches):
license detection and are mere license clues.
"""
return not is_correct_detection(license_matches) and (
has_unknown_matches(license_matches) or
is_match_coverage_less_than_threshold(
license_matches=license_matches,
threshold=CLUES_MATCH_COVERAGE_THR,
Expand Down Expand Up @@ -718,7 +720,11 @@ def get_detected_license_expression(matches, analysis, post_scan=False):
reasons.append(DetectionRule.UNKNOWN_INTRO_FOLLOWED_BY_MATCH.value)

elif post_scan:
if analysis == DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value:
if analysis == DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value:
matches_for_expression = filter_license_references(matches)
reasons.append(DetectionRule.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value)

elif analysis == DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value:
matches_for_expression = filter_license_references(matches)
reasons.append(DetectionRule.UNKNOWN_REFERENCE_TO_LOCAL_FILE.value)

Expand Down
81 changes: 81 additions & 0 deletions src/packagedcode/licensing.py
Expand Up @@ -143,6 +143,87 @@ def add_referenced_license_matches_for_package(resource, codebase, no_licenses):
yield resource


def add_referenced_license_detection_from_package(resource, codebase):
"""
Return an updated ``resource`` saving it in place, after adding new license
matches (licenses and license_expressions) following their Rule
``referenced_filenames`` if it is pointing to a package.
"""
if TRACE:
logger_debug(f'packagedcode.licensing: add_referenced_license_matches_from_package: resource: {resource.path}')

if not resource.is_file:
return

license_detections = resource.license_detections
if not license_detections:
return

codebase_packages = codebase.attributes.packages

modified = False

for detection in license_detections:
detection_modified = False
matches = detection["matches"]
referenced_filenames = get_referenced_filenames(matches)
if not referenced_filenames:
continue

has_reference_to_package = any([
'package' in referenced_filename
for referenced_filename in referenced_filenames
])

if not has_reference_to_package:
continue

for_packages = resource.for_packages
for package_uid in for_packages:

for codebase_package in codebase_packages:
if codebase_package["package_uid"] == package_uid:
break

pkg_detections = codebase_package["license_detections"]
for pkg_detection in pkg_detections:
modified = True
detection_modified = True
matches.extend(pkg_detection["matches"])

if not detection_modified:
continue

reasons, license_expression = get_detected_license_expression(
matches=matches,
analysis=DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value,
post_scan=True,
)
detection["license_expression"] = str(license_expression)
detection["detection_rules"] = reasons

if modified:

license_expressions = [
detection["license_expression"]
for detection in license_detections
]

resource.detected_license_expression = combine_expressions(
expressions=license_expressions,
relation='AND',
unique=True,
)

resource.detected_license_expression_spdx = str(build_spdx_license_expression(
license_expression=resource.detected_license_expression,
licensing=get_cache().licensing,
))

codebase.save_resource(resource)
yield resource


def add_license_from_sibling_file(resource, codebase, no_licenses):

if TRACE:
Expand Down
12 changes: 12 additions & 0 deletions src/packagedcode/plugin_package.py
Expand Up @@ -27,6 +27,7 @@
from licensedcode.detection import DetectionRule
from packagedcode import get_package_handler
from packagedcode.licensing import add_referenced_license_matches_for_package
from packagedcode.licensing import add_referenced_license_detection_from_package
from packagedcode.licensing import add_license_from_sibling_file
from packagedcode.licensing import get_license_detection_mappings
from packagedcode.licensing import get_license_expression_from_detection_mappings
Expand Down Expand Up @@ -162,6 +163,8 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
"""
no_licenses = False

# These steps add proper license detections to package_data and hence
# this is performed before top level packages creation
for resource in codebase.walk(topdown=False):
if not hasattr(resource, 'license_detections'):
no_licenses=True
Expand Down Expand Up @@ -190,6 +193,15 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
# Create codebase-level packages and dependencies
create_package_and_deps(codebase, strip_root=strip_root, **kwargs)

if not no_licenses:
# This step is dependent on top level packages
for resource in codebase.walk(topdown=False):
# If there is a unknown reference to a package we add the license
# from the package license detection
modified = list(add_referenced_license_detection_from_package(resource, codebase))
if TRACE and modified:
logger_debug(f'packagedcode: process_codebase: add_referenced_license_matches_from_package: modified: {modified}')


def add_license_from_file(resource, codebase, no_licenses):
"""
Expand Down

0 comments on commit bfcc3ca

Please sign in to comment.