Skip to content

Commit ee2970e

Browse files
committed
feat(scan): add license source filtering and priority-based selection
1 parent 4cb946d commit ee2970e

File tree

2 files changed

+56
-3
lines changed

2 files changed

+56
-3
lines changed

src/scanoss/inspection/policy_check/scanoss/copyleft.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ def __init__( # noqa: PLR0913
9696
self.filepath,
9797
include,
9898
exclude,
99-
explicit)
99+
explicit,
100+
self.license_sources)
100101

101102
def _json(self, components: list[Component]) -> PolicyOutput:
102103
"""
@@ -179,7 +180,7 @@ def _get_components_with_copyleft_licenses(self, components: list) -> list[Dict]
179180
"""
180181
filtered_components = []
181182
for component in components:
182-
copyleft_licenses = [lic for lic in component['licenses'] if (lic['source'] in self.license_sources or lic['source'] == 'unknown') and lic['copyleft']]
183+
copyleft_licenses = [lic for lic in component['licenses'] if lic['copyleft']]
183184
if copyleft_licenses:
184185
# Remove unused keys
185186
del component['count']

src/scanoss/inspection/utils/scan_result_processor.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ def __init__( # noqa: PLR0913
7171
include: str = None,
7272
exclude: str = None,
7373
explicit: str = None,
74+
license_sources: list = None,
7475
):
7576
super().__init__(debug, trace, quiet)
7677
self.result_file_path = result_file_path
7778
self.license_util = LicenseUtil()
7879
self.license_util.init(include, exclude, explicit)
80+
self.license_sources = license_sources
7981
self.results = self._load_input_file()
8082

8183
def get_results(self) -> Dict[str, Any]:
@@ -162,8 +164,11 @@ def _append_license_to_component(self,
162164
self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
163165
return
164166

167+
# Select licenses based on configuration (filtering or priority mode)
168+
selected_licenses = self._select_licenses(new_component['licenses'])
169+
165170
# Process licenses for this component
166-
for license_item in new_component['licenses']:
171+
for license_item in selected_licenses:
167172
if license_item.get('name'):
168173
spdxid = license_item['name']
169174
source = license_item.get('source')
@@ -308,6 +313,53 @@ def convert_components_to_list(self, components: dict):
308313
component['licenses'] = []
309314
return results_list
310315

316+
def _select_licenses(self, licenses_data):
317+
"""
318+
Select licenses based on configuration.
319+
320+
Two modes:
321+
- Filtering mode: If license_sources specified, filter to those sources
322+
- Priority mode: Otherwise, use original priority-based selection
323+
324+
Args:
325+
licenses_data: List of license dictionaries
326+
327+
Returns:
328+
Filtered list of licenses based on configuration
329+
"""
330+
# Filtering mode, when license_sources is explicitly provided
331+
if self.license_sources:
332+
sources_to_include = set(self.license_sources) | {'unknown'}
333+
return [lic for lic in licenses_data
334+
if lic.get('source') in sources_to_include or lic.get('source') is None]
335+
336+
# Define priority order (highest to lowest)
337+
priority_sources = ['component_declared', 'license_file', 'file_header', 'scancode']
338+
339+
# Group licenses by source
340+
licenses_by_source = {}
341+
for license_item in licenses_data:
342+
343+
source = license_item.get('source', 'unknown')
344+
if source not in licenses_by_source:
345+
licenses_by_source[source] = {}
346+
347+
license_name = license_item.get('name')
348+
if license_name:
349+
# Use license name as key, store full license object as value
350+
# If duplicate license names exist in same source, the last one wins
351+
licenses_by_source[source][license_name] = license_item
352+
353+
# Find the highest priority source that has licenses
354+
for priority_source in priority_sources:
355+
if priority_source in licenses_by_source:
356+
self.print_trace(f'Choosing {priority_source} as source')
357+
return list(licenses_by_source[priority_source].values())
358+
359+
# If no priority sources found, combine all licenses into a single list
360+
self.print_debug("No priority sources found, returning all licenses as list")
361+
return licenses_data
362+
311363
def group_components_by_license(self,components):
312364
"""
313365
Groups components by their unique component-license pairs.

0 commit comments

Comments
 (0)