Skip to content

Commit

Permalink
Update referenced license detection from multiple files
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra committed Mar 19, 2024
1 parent 49da477 commit d6f8109
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 1,155 deletions.
18 changes: 13 additions & 5 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1377,11 +1377,15 @@ def has_references_to_local_files(license_matches):

def use_referenced_license_expression(referenced_license_expression, license_detection, licensing=Licensing()):
"""
Return True if the `license_detection` LicenseDetection object should
include the referenced LicenseMatch objects (the `referenced_license_expression`
LicenseExpression string is the combined License Expression for these matches)
that it references, otherwise if return False if the LicenseDetection object
should remain intact.
Return True if the ``license_detection`` LicenseDetection should include
the matches represented by the ``referenced_license_expression`` string.
Return False otherwise.
Used when we have a ``license_detection`` with a match to a license rule like
"See license in COPYING" and where the ``referenced_license_expression`` is the
expression found in the "COPYING" file, which is the combined expression from
all license detections found in "COPYING" (or multiple referenced files).
Reference: https://github.com/nexB/scancode-toolkit/issues/3547
"""
#TODO: Also determing if referenced matches could be added but
Expand Down Expand Up @@ -1416,6 +1420,10 @@ def use_referenced_license_expression(referenced_license_expression, license_det
if same_license_keys and not same_expression:
return False

# when there are many license keys in an expression, and there are no
# unknown or other cases, we cannot safely conclude that we should
# follow the license in the referenced filenames. This is likely
# a case where we have larger notices and several combined expressions,
if len(referenced_license_keys) > 5:
return False

Expand Down
80 changes: 40 additions & 40 deletions src/licensedcode/plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def logger_debug(*args):

logger = logging.getLogger(__name__)

if TRACE:
if TRACE or TRACE_REFERENCE:
import sys
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -217,6 +217,8 @@ def process_codebase(self, codebase, license_text=False, license_diagnostics=Fal
f'before: {license_expressions_before}\n'
f'after : {license_expressions_after}'
)

#raise Exception()

license_detections = collect_license_detections(
codebase=codebase,
Expand Down Expand Up @@ -273,8 +275,6 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
license_detection_mapping=license_detection_mapping,
file_path=resource.path,
)
detection_modified = False
detections_added = []
license_match_mappings = license_detection_mapping["matches"]
referenced_filenames = get_referenced_filenames(license_detection.matches)

Expand All @@ -285,6 +285,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
)
continue

referenced_detections = []
for referenced_filename in referenced_filenames:
referenced_resource = find_referenced_resource(
referenced_filename=referenced_filename,
Expand All @@ -293,46 +294,45 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
)

if referenced_resource and referenced_resource.license_detections:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_resource.license_detections
],
referenced_detections.extend(
referenced_resource.license_detections
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection,
):
if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: False for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)
continue

if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: True for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)

modified = True
detection_modified = True
detections_added.extend(referenced_resource.license_detections)
matches_to_extend = get_matches_from_detection_mappings(
license_detections=referenced_resource.license_detections
)
populate_matches_with_path(
matches=matches_to_extend,
path=referenced_resource.path
)
license_match_mappings.extend(matches_to_extend)

if not detection_modified:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection,
):
if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: False for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)
continue

if TRACE_REFERENCE:
logger_debug(
f'use_referenced_license_expression: True for '
f'resource: {referenced_resource.path} and '
f'license_expression: {referenced_license_expression}',
)

modified = True
matches_to_extend = get_matches_from_detection_mappings(
license_detections=referenced_detections
)
populate_matches_with_path(
matches=matches_to_extend,
path=referenced_resource.path
)
license_match_mappings.extend(matches_to_extend)

detection_log, license_expression = get_detected_license_expression(
license_match_mappings=license_match_mappings,
analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value,
Expand All @@ -348,7 +348,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
license_detection_mapping["detection_log"] = detection_log
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
initial_detection=license_detection_mapping,
detections_added=detections_added,
detections_added=referenced_detections,
license_expression=license_expression,
)

Expand Down
52 changes: 26 additions & 26 deletions src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ def add_referenced_license_matches_for_package(resource, codebase):
)

detections_added = []
detection_modified = False
license_match_mappings = license_detection_mapping["matches"]
referenced_filenames = get_referenced_filenames(license_detection_object.matches)
if not referenced_filenames:
continue

referenced_detections = []
for referenced_filename in referenced_filenames:
referenced_resource = find_referenced_resource(
referenced_filename=referenced_filename,
Expand All @@ -109,35 +109,35 @@ def add_referenced_license_matches_for_package(resource, codebase):
)

if referenced_resource and referenced_resource.license_detections:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_resource.license_detections
],
referenced_detections.extend(
referenced_resource.license_detections
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection_object,
):
continue

modified = True
detection_modified = True
detections_added.extend(referenced_resource.license_detections)
matches_to_extend = get_matches_from_detection_mappings(
license_detections=referenced_resource.license_detections
)
# For LicenseMatches with different resources as origin, add the
# resource path to these matches as origin info
populate_matches_with_path(
matches=matches_to_extend,
path=referenced_resource.path
)
license_match_mappings.extend(matches_to_extend)

if not detection_modified:
referenced_license_expression = combine_expressions(
expressions=[
detection["license_expression"]
for detection in referenced_detections
],
)
if not use_referenced_license_expression(
referenced_license_expression=referenced_license_expression,
license_detection=license_detection_object,
):
continue

modified = True
detections_added.extend(referenced_resource.license_detections)
matches_to_extend = get_matches_from_detection_mappings(
license_detections=referenced_resource.license_detections,
)
# For LicenseMatches with different resources as origin, add the
# resource path to these matches as origin info
populate_matches_with_path(
matches=matches_to_extend,
path=referenced_resource.path
)
license_match_mappings.extend(matches_to_extend)

detection_log, license_expression = get_detected_license_expression(
license_match_mappings=license_match_mappings,
analysis=DetectionCategory.PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL.value,
Expand Down

Large diffs are not rendered by default.

0 comments on commit d6f8109

Please sign in to comment.