Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine referenced filenames #3547 #3681

Merged
merged 5 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,25 @@ def identifier_with_expression(self):
id_safe_expression = python_safe_name(s=str(self.license_expression))
return "{}-{}".format(id_safe_expression, self._identifier)

@property
def is_unknown(self):
"""
Return True if there are unknown license keys in the license expression
for this detection, return False otherwise.
"""
unknown_license_keys = [
"unknown-license-reference",
"unknown-spdx",
"unknown",
"free-unknown"
]

for license_key in unknown_license_keys:
if license_key in self.license_expression:
return True

return False

def get_start_end_line(self):
"""
Return start and end line for a license detection issue, from the
Expand Down Expand Up @@ -1356,6 +1375,53 @@ def has_references_to_local_files(license_matches):
)


def use_referenced_license_expression(referenced_license_expression, license_detection, licensing=Licensing()):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Return True if the `license_detection` LicenseDetection object should
include the referenced LicenseMatch objects (the `referenced_license_expression`
LicenseExpression string is the combined License Expression for these matches)
that it references, otherwise if return False if the LicenseDetection object
should remain intact.
Reference: https://github.com/nexB/scancode-toolkit/issues/3547
"""
#TODO: Also determing if referenced matches could be added but
# resulting license expression should not be modified.

if not referenced_license_expression or not license_detection:
return False

# We should always include referenced license matches to resolve an unknown
# license reference
if license_detection.is_unknown:
return True

# We should always include referenced license matches when the license
# expression from the referenced license matches match the license
# expression for the detection
if referenced_license_expression == license_detection.license_expression:
return True

license_keys = set(
licensing.license_keys(expression=license_detection.license_expression)
)
referenced_license_keys = set(
licensing.license_keys(expression=referenced_license_expression)
)
same_expression = referenced_license_expression == license_detection.license_expression
same_license_keys = license_keys == referenced_license_keys

# If we have the same license keys but not the same license expression then
# the reference could merely be pointing to notices, combining which produces
# a different expression, and the original detection is correct
if same_license_keys and not same_expression:
return False

if len(referenced_license_keys) > 5:
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
return False

return True


def get_detected_license_expression(
analysis,
license_matches=None,
Expand Down
38 changes: 38 additions & 0 deletions src/licensedcode/plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from commoncode.cliutils import PluggableCommandLineOption
from commoncode.cliutils import SCAN_GROUP
from commoncode.cliutils import SCAN_OPTIONS_GROUP
from license_expression import combine_expressions
from plugincode.scan import ScanPlugin
from plugincode.scan import scan_impl

Expand All @@ -30,10 +31,12 @@
from licensedcode.detection import LicenseDetectionFromResult
from licensedcode.detection import sort_unique_detections
from licensedcode.detection import UniqueDetection
from licensedcode.detection import use_referenced_license_expression
from packagedcode.utils import combine_expressions
from scancode.api import SCANCODE_LICENSEDB_URL

TRACE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE', False)
TRACE_REFERENCE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE_REFERENCE', False)


def logger_debug(*args):
Expand Down Expand Up @@ -259,6 +262,11 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):

modified = False

if TRACE_REFERENCE:
logger_debug(
f'add_referenced_license_matches: resource_path: {resource.path}',
)

for license_detection_mapping in license_detection_mappings:

license_detection = LicenseDetectionFromResult.from_license_detection_mapping(
Expand All @@ -271,6 +279,10 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
referenced_filenames = get_referenced_filenames(license_detection.matches)

if not referenced_filenames:
if TRACE_REFERENCE:
logger_debug(
f'No references at license detection with expression: {license_detection.license_expression}',
)
continue

for referenced_filename in referenced_filenames:
Expand All @@ -281,6 +293,31 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
)

if referenced_resource and referenced_resource.license_detections:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_resource.license_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection,
):
if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: False for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)
continue

if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: True for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)

modified = True
detection_modified = True
detections_added.extend(referenced_resource.license_detections)
Expand All @@ -301,6 +338,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value,
post_scan=True,
)

license_expression_spdx = build_spdx_license_expression(
license_expression=str(license_expression),
licensing=get_cache().licensing,
Expand Down
52 changes: 45 additions & 7 deletions src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from licensedcode.detection import detect_licenses
from licensedcode.detection import LicenseDetectionFromResult
from licensedcode.detection import populate_matches_with_path
from licensedcode.detection import use_referenced_license_expression
from licensedcode.spans import Span
from licensedcode import query

Expand Down Expand Up @@ -93,6 +94,7 @@ def add_referenced_license_matches_for_package(resource, codebase):
file_path=resource.path,
)

detections_added = []
detection_modified = False
license_match_mappings = license_detection_mapping["matches"]
referenced_filenames = get_referenced_filenames(license_detection_object.matches)
Expand All @@ -106,16 +108,24 @@ def add_referenced_license_matches_for_package(resource, codebase):
codebase=codebase,
)

if not referenced_resource:
continue

referenced_license_detections = referenced_resource.license_detections
if referenced_resource and referenced_resource.license_detections:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_resource.license_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection_object,
):
continue

if referenced_license_detections:
modified = True
detection_modified = True
detections_added.extend(referenced_resource.license_detections)
matches_to_extend = get_matches_from_detection_mappings(
license_detections=referenced_license_detections
license_detections=referenced_resource.license_detections
)
# For LicenseMatches with different resources as origin, add the
# resource path to these matches as origin info
Expand All @@ -142,7 +152,7 @@ def add_referenced_license_matches_for_package(resource, codebase):
license_detection_mapping["detection_log"] = detection_log
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
initial_detection=license_detection_mapping,
detections_added=referenced_license_detections,
detections_added=detections_added,
license_expression=license_expression,
)

Expand Down Expand Up @@ -223,7 +233,20 @@ def add_referenced_license_detection_from_package(resource, codebase):
f'sibling_license_detections: {sibling_license_detections}'
)

referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in sibling_license_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection_object,
):
continue

for sibling_detection in sibling_license_detections:

modified = True
detection_modified = True
license_match_mappings.extend(sibling_detection["matches"])
Expand All @@ -239,6 +262,21 @@ def add_referenced_license_detection_from_package(resource, codebase):
break

pkg_detections = codebase_package["license_detections"]
if not pkg_detections:
continue

referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in pkg_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection_object,
):
continue

for pkg_detection in pkg_detections:
modified = True
detection_modified = True
Expand Down

Large diffs are not rendered by default.