Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Upcoming changes...

## [1.37.1] - 2025-10-21
### Added
- Added source filtering to cyclonedx conversion
### Fixed
- Fixed dependencies being skipped during spdx conversion

## [1.37.0] - 2025-10-17
### Added
- Added delta folder and file copy command
Expand Down Expand Up @@ -689,3 +695,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[1.35.0]: https://github.com/scanoss/scanoss.py/compare/v1.34.0...v1.35.0
[1.36.0]: https://github.com/scanoss/scanoss.py/compare/v1.35.0...v1.36.0
[1.37.0]: https://github.com/scanoss/scanoss.py/compare/v1.36.0...v1.37.0
[1.37.1]: https://github.com/scanoss/scanoss.py/compare/v1.37.0...v1.37.1
2 changes: 1 addition & 1 deletion src/scanoss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
THE SOFTWARE.
"""

__version__ = '1.37.0'
__version__ = '1.37.1'
9 changes: 7 additions & 2 deletions src/scanoss/cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ def parse(self, data: dict): # noqa: PLR0912, PLR0915
fdl = []
if licenses:
for lic in licenses:
fdl.append({'id': lic.get('name')})
name = lic.get('name')
source = lic.get('source')
if source not in ('component_declared', 'license_file', 'file_header'):
continue
fdl.append({'id': name})
fd['licenses'] = fdl
Comment on lines +155 to 160
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

License source filtering for file matches looks good; mirror this for dependencies.

You correctly keep only ('component_declared', 'license_file', 'file_header') here. The dependency path above (Lines 91–103) still accepts all sources and should apply the same filter for consistency.

Suggested change for the dependency block:

-                        licenses = deps.get('licenses')
-                        fdl = []
-                        if licenses:
-                            dc = []
-                            for lic in licenses:
-                                name = lic.get('name')
-                                if name not in dc:  # Only save the license name once
-                                    fdl.append({'id': name})
-                                    dc.append(name)
+                        licenses = deps.get('licenses')
+                        fdl = []
+                        if licenses:
+                            seen = set()
+                            for lic in licenses:
+                                name = lic.get('name')
+                                source = lic.get('source')
+                                if source not in ('component_declared', 'license_file', 'file_header'):
+                                    continue
+                                if name and name not in seen:
+                                    fdl.append({'id': name})
+                                    seen.add(name)
                         fd['licenses'] = fdl

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/scanoss/cyclonedx.py around lines 91 to 103, the dependency license
handling currently accepts all license sources while the file-level code (lines
155–160) restricts to ('component_declared', 'license_file', 'file_header');
update the dependency block to apply the same filter by checking
lic.get('source') and only appending licenses whose source is one of those three
values so dependency licenses mirror the file-level filtering logic.

cdx[purl] = fd
# self.print_stderr(f'VD: {vdx}')
Expand Down Expand Up @@ -295,7 +299,8 @@ def produce_from_str(self, json_str: str, output_file: str = None) -> bool:
except Exception as e:
self.print_stderr(f'ERROR: Problem parsing input JSON: {e}')
return False
return self.produce_from_json(data, output_file)
success, _ = self.produce_from_json(data, output_file)
return success

def _normalize_vulnerability_id(self, vuln: dict) -> tuple[str, str]:
"""
Expand Down
6 changes: 4 additions & 2 deletions src/scanoss/spdxlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ def _process_licenses(self, licenses: list) -> list:
Process license information and remove duplicates.

This method filters license information to include only licenses from trusted sources
('component_declared' or 'license_file') and removes any duplicate license names.
('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
removes any duplicate license names.
The result is a simplified list of license dictionaries containing only the 'id' field.

Args:
Expand All @@ -247,7 +249,7 @@ def _process_licenses(self, licenses: list) -> list:
for license_info in licenses:
name = license_info.get('name')
source = license_info.get('source')
if source not in ("component_declared", "license_file", "file_header"):
if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
Copy link
Contributor

@matiasdaloia matiasdaloia Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be simplified to

if source not in (None, '', "component_declared", "license_file", "file_header"):

continue
if name and name not in seen_names:
processed_licenses.append({'id': name})
Expand Down