diff --git a/CHANGELOG.md b/CHANGELOG.md index 47e58f01..aef5fbb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Upcoming changes... +## [1.39.0] - 2025-10-24 +### Added +- Added `glc-codequality` format to convert subcommand +- Added `inspect gitlab matches` subcommand to generate GitLab-compatible Markdown match summary from SCANOSS scan results +- Added utility modules for shared functionality (`markdown_utils.py` and `file_utils.py`) +### Changed +- Refactored table generation utilities into shared `markdown_utils` module +- Refactored JSON file loading into shared `file_utils` module + ## [1.38.0] - 2025-10-24 ### Added - Add support for settings debug mode via `SCANOSS_DEBUG` environment variable @@ -701,3 +710,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [1.37.0]: https://github.com/scanoss/scanoss.py/compare/v1.36.0...v1.37.0 [1.37.1]: https://github.com/scanoss/scanoss.py/compare/v1.37.0...v1.37.1 [1.38.0]: https://github.com/scanoss/scanoss.py/compare/v1.37.1...v1.38.0 +[1.39.0]: https://github.com/scanoss/scanoss.py/compare/v1.38.0...v1.39.0 \ No newline at end of file diff --git a/CLIENT_HELP.md b/CLIENT_HELP.md index 074e3b20..877c6b8a 100644 --- a/CLIENT_HELP.md +++ b/CLIENT_HELP.md @@ -260,7 +260,7 @@ scanoss-py scan src -hdr "x-api-key:12345" -hdr "Authorization: Bearer --dt-url --dt-projectname --dt-projectversion --dt-apikey --format md --output project-violations.md ``` +#### Inspect GitLab Component Match Summary Markdown Output +The following command can be used to generate a component match summary in Markdown format for GitLab: +```bash +scanoss-py inspect gitlab matches --input -lpr --output gitlab-component-match-summary.md +``` ### Folder-Scan a Project Folder diff --git a/src/scanoss/cli.py b/src/scanoss/cli.py index b4984184..0542ebeb 100644 --- a/src/scanoss/cli.py +++ b/src/scanoss/cli.py @@ -40,6 +40,7 @@ ) from scanoss.inspection.raw.component_summary import ComponentSummary from scanoss.inspection.raw.license_summary import LicenseSummary +from scanoss.inspection.raw.match_summary import MatchSummary from scanoss.scanners.container_scanner import ( DEFAULT_SYFT_COMMAND, DEFAULT_SYFT_TIMEOUT, @@ -73,6 +74,7 @@ from .csvoutput import CsvOutput from .cyclonedx import CycloneDx from .filecount import FileCount +from .gitlabqualityreport import GitLabQualityReport from .inspection.raw.copyleft import Copyleft from .inspection.raw.undeclared_component import UndeclaredComponent from .results import Results @@ -283,7 +285,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 '--format', '-f', type=str, - choices=['cyclonedx', 'spdxlite', 'csv'], + choices=['cyclonedx', 'spdxlite', 'csv', 'glc-codequality'], default='spdxlite', help='Output format (optional - default: spdxlite)', ) @@ -794,6 +796,66 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 help='Timeout (in seconds) for API communication (optional - default 300 sec)', ) + + # ============================================================================== + # GitLab Integration Parser + # ============================================================================== + # Main parser for GitLab-specific inspection commands and report generation + p_gitlab_sub = p_inspect_sub.add_parser( + 'gitlab', + aliases=['glc'], + description='Generate GitLab-compatible reports from SCANOSS scan results (Markdown summaries)', + help='Generate GitLab integration reports', + ) + + # GitLab sub-commands parser + # Provides access to different GitLab report formats and inspection tools + p_gitlab_sub_parser = p_gitlab_sub.add_subparsers( + title='GitLab Report Types', + dest='subparser_subcmd', + description='Available GitLab report formats for scan result analysis', + help='Select the type of GitLab report to generate', + ) + + # ============================================================================== + # GitLab Matches Summary Command + # ============================================================================== + # Analyzes scan results and generates a GitLab-compatible Markdown summary + p_gl_inspect_matches = p_gitlab_sub_parser.add_parser( + 'matches', + aliases=['ms'], + description='Generate a Markdown summary report of scan matches for GitLab integration', + help='Generate Markdown summary report of scan matches', + ) + + # Input file argument - SCANOSS scan results in JSON format + p_gl_inspect_matches.add_argument( + '-i', + '--input', + required=True, + type=str, + help='Path to SCANOSS scan results file (JSON format) to analyze' + ) + + # Line range prefix for GitLab file navigation + # Enables clickable file references in the generated report that link to specific lines in GitLab + p_gl_inspect_matches.add_argument( + '-lpr', + '--line-range-prefix', + required=True, + type=str, + help='Base URL prefix for GitLab file links with line ranges (e.g., https://gitlab.com/org/project/-/blob/main)' + ) + + # Output file argument - where to save the generated Markdown report + p_gl_inspect_matches.add_argument( + '--output', + '-o', + required=False, + type=str, + help='Output file path for the generated Markdown report (default: stdout)' + ) + # TODO Move to the command call def location # RAW results p_inspect_raw_undeclared.set_defaults(func=inspect_undeclared) @@ -807,6 +869,8 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 p_inspect_legacy_component_summary.set_defaults(func=inspect_component_summary) # Dependency Track p_inspect_dt_project_violation.set_defaults(func=inspect_dep_track_project_violations) + # GitLab + p_gl_inspect_matches.set_defaults(func=inspect_gitlab_matches) # ========================================================================= # END INSPECT SUBCOMMAND CONFIGURATION @@ -1153,6 +1217,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 p_inspect_legacy_license_summary, p_inspect_legacy_component_summary, p_inspect_dt_project_violation, + p_gl_inspect_matches, c_provenance, p_folder_scan, p_folder_hash, @@ -1207,7 +1272,11 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 ) and not args.subparsercmd: parser.parse_args([args.subparser, '--help']) # Force utils helps to be displayed sys.exit(1) - elif (args.subparser in 'inspect') and (args.subparsercmd in ('raw', 'dt')) and (args.subparser_subcmd is None): + elif ( + (args.subparser in 'inspect') + and (args.subparsercmd in ('raw', 'dt', 'glc', 'gitlab')) + and (args.subparser_subcmd is None) + ): parser.parse_args([args.subparser, args.subparsercmd, '--help']) # Force utils helps to be displayed sys.exit(1) args.func(parser, args) # Execute the function associated with the sub-command @@ -1628,6 +1697,11 @@ def convert(parser, args): print_stderr('Producing CSV report...') csvo = CsvOutput(debug=args.debug, output_file=args.output) success = csvo.produce_from_file(args.input) + elif args.format == 'glc-codequality': + if not args.quiet: + print_stderr('Producing GitLab code quality report...') + glc_code_quality = GitLabQualityReport(debug=args.debug, trace=args.trace, quiet=args.quiet) + success = glc_code_quality.produce_from_file(args.input, output_file=args.output) else: print_stderr(f'ERROR: Unknown output format (--format): {args.format}') if not success: @@ -1901,6 +1975,69 @@ def inspect_dep_track_project_violations(parser, args): sys.exit(1) +def inspect_gitlab_matches(parser,args): + """ + Handle GitLab matches the summary inspection command. + + Analyzes SCANOSS scan results and generates a GitLab-compatible Markdown summary + report of component matches. The report includes match details, file locations, + and optionally clickable links to source files in GitLab repositories. + + This command processes SCANOSS scan output and creates human-readable Markdown. + + Parameters + ---------- + parser : ArgumentParser + Command line parser object for help display + args : Namespace + Parsed command line arguments containing: + - input: Path to SCANOSS scan results file (JSON format) to analyze + - line_range_prefix: Base URL prefix for generating GitLab file links with line ranges + (e.g., 'https://gitlab.com/org/project/-/blob/main') + - output: Optional output file path for the generated Markdown report (default: stdout) + - debug: Enable debug output for troubleshooting + - trace: Enable trace-level logging + - quiet: Suppress informational messages + + Notes + ----- + - The output is formatted in Markdown for optimal display in GitLab + - Line range prefix enables clickable file references in the report + - If output is not specified, the report is written to stdout + """ + + if args.input is None: + parser.parse_args([args.subparser, '-h']) + sys.exit(1) + + if args.line_range_prefix is None: + parser.parse_args([args.subparser, '-h']) + sys.exit(1) + + # Initialize output file if specified (create/truncate) + if args.output: + initialise_empty_file(args.output) + + try: + # Create GitLab matches summary generator with configuration + match_summary = MatchSummary( + debug=args.debug, + trace=args.trace, + quiet=args.quiet, + scanoss_results_path=args.input, # Path to SCANOSS JSON results + output=args.output, # Output file path or None for stdout + line_range_prefix=args.line_range_prefix, # GitLab URL prefix for file links + ) + + # Execute the summary generation + match_summary.run() + except Exception as e: + # Handle any errors during report generation + print_stderr(e) + if args.debug: + traceback.print_exc() + sys.exit(1) + # ============================================================================= # END INSPECT COMMAND HANDLERS # ============================================================================= diff --git a/src/scanoss/gitlabqualityreport.py b/src/scanoss/gitlabqualityreport.py new file mode 100644 index 00000000..62dc25f4 --- /dev/null +++ b/src/scanoss/gitlabqualityreport.py @@ -0,0 +1,185 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +import json +import os +import sys +from dataclasses import dataclass + +from .scanossbase import ScanossBase +from .utils import scanoss_scan_results_utils + + +@dataclass +class Lines: + begin: int + +@dataclass +class Location: + path: str + lines: Lines + +@dataclass +class CodeQuality: + description: str + check_name: str + fingerprint: str + severity: str + location: Location + + def to_dict(self): + """Convert to dictionary for JSON serialization.""" + return { + "description": self.description, + "check_name": self.check_name, + "fingerprint": self.fingerprint, + "severity": self.severity, + "location": { + "path": self.location.path, + "lines": { + "begin": self.location.lines.begin + } + } + } + +class GitLabQualityReport(ScanossBase): + """ + GitLabCodeQuality management class + Handle all interaction with GitLab Code Quality Report formatting + """ + + def __init__(self, debug: bool = False, trace: bool = False, quiet: bool = False): + """ + Initialise the GitLabCodeQuality class + """ + super().__init__(debug, trace, quiet) + + + def _get_code_quality(self, file_name: str, result: dict) -> CodeQuality or None: + if not result.get('file_hash'): + self.print_debug(f"Warning: no hash found for result: {result}") + return None + + if result.get('id') == 'file': + description = f"File match found in: {file_name}" + return CodeQuality( + description=description, + check_name=file_name, + fingerprint=result.get('file_hash'), + severity="info", + location=Location( + path=file_name, + lines = Lines( + begin= 1 + ) + ) + ) + + if not result.get('lines'): + self.print_debug(f"Warning: No lines found for result: {result}") + return None + lines = scanoss_scan_results_utils.get_lines(result.get('lines')) + if len(lines) == 0: + self.print_debug(f"Warning: empty lines for result: {result}") + return None + end_line = lines[len(lines) - 1] if len(lines) > 1 else lines[0] + description = f"Snippet found in: {file_name} - lines {lines[0]}-{end_line}" + return CodeQuality( + description=description, + check_name=file_name, + fingerprint=result.get('file_hash'), + severity="info", + location=Location( + path=file_name, + lines=Lines( + begin=lines[0] + ) + ) + ) + + def _write_output(self, data: list[CodeQuality], output_file: str = None) -> bool: + """Write the Gitlab Code Quality Report to output.""" + try: + json_data = [item.to_dict() for item in data] + file = open(output_file, 'w') if output_file else sys.stdout + print(json.dumps(json_data, indent=2), file=file) + if output_file: + file.close() + return True + except Exception as e: + self.print_stderr(f'Error writing output: {str(e)}') + return False + + def _produce_from_json(self, data: dict, output_file: str = None) -> bool: + code_quality = [] + for file_name, results in data.items(): + for result in results: + if not result.get('id'): + self.print_debug(f"Warning: No ID found for result: {result}") + continue + if result.get('id') != 'snippet' and result.get('id') != 'file': + self.print_debug(f"Skipping non-snippet/file match: {result}") + continue + code_quality_item = self._get_code_quality(file_name, result) + if code_quality_item: + code_quality.append(code_quality_item) + else: + self.print_debug(f"Warning: No Code Quality found for result: {result}") + self._write_output(data=code_quality,output_file=output_file) + return True + + def _produce_from_str(self, json_str: str, output_file: str = None) -> bool: + """ + Produce Gitlab Code Quality Report output from input JSON string + :param json_str: input JSON string + :param output_file: Output file (optional) + :return: True if successful, False otherwise + """ + if not json_str: + self.print_stderr('ERROR: No JSON string provided to parse.') + return False + try: + data = json.loads(json_str) + except Exception as e: + self.print_stderr(f'ERROR: Problem parsing input JSON: {e}') + return False + return self._produce_from_json(data, output_file) + + + def produce_from_file(self, json_file: str, output_file: str = None) -> bool: + """ + Parse plain/raw input JSON file and produce GitLab Code Quality JSON output + :param json_file: + :param output_file: + :return: True if successful, False otherwise + """ + if not json_file: + self.print_stderr('ERROR: No JSON file provided to parse.') + return False + if not os.path.isfile(json_file): + self.print_stderr(f'ERROR: JSON file does not exist or is not a file: {json_file}') + return False + with open(json_file, 'r') as f: + success = self._produce_from_str(f.read(), output_file) + return success diff --git a/src/scanoss/inspection/dependency_track/project_violation.py b/src/scanoss/inspection/dependency_track/project_violation.py index b1c7597c..c891d76c 100644 --- a/src/scanoss/inspection/dependency_track/project_violation.py +++ b/src/scanoss/inspection/dependency_track/project_violation.py @@ -28,6 +28,7 @@ from ...services.dependency_track_service import DependencyTrackService from ..policy_check import PolicyCheck, PolicyStatus +from ..utils.markdown_utils import generate_jira_table, generate_table # Constants PROCESSING_RETRY_DELAY = 5 # seconds @@ -195,7 +196,7 @@ def _markdown(self, project_violations: list[PolicyViolationDict]) -> Dict[str, Returns: Dictionary with formatted Markdown details and summary """ - return self._md_summary_generator(project_violations, self.generate_table) + return self._md_summary_generator(project_violations, generate_table) def _jira_markdown(self, data: list[PolicyViolationDict]) -> Dict[str, Any]: """ @@ -207,7 +208,7 @@ def _jira_markdown(self, data: list[PolicyViolationDict]) -> Dict[str, Any]: Returns: Dictionary containing Jira markdown formatted results and summary """ - return self._md_summary_generator(data, self.generate_jira_table) + return self._md_summary_generator(data, generate_jira_table) def is_project_updated(self, dt_project: Dict[str, Any]) -> bool: """ diff --git a/src/scanoss/inspection/policy_check.py b/src/scanoss/inspection/policy_check.py index decde1aa..cd01972f 100644 --- a/src/scanoss/inspection/policy_check.py +++ b/src/scanoss/inspection/policy_check.py @@ -137,48 +137,6 @@ def _jira_markdown(self, data: list[T]) -> Dict[str, Any]: """ pass - def generate_table(self, headers, rows, centered_columns=None): - """ - Generate a Markdown table. - - :param headers: List of headers for the table. - :param rows: List of rows for the table. - :param centered_columns: List of column indices to be centered. - :return: A string representing the Markdown table. - """ - col_sep = ' | ' - centered_column_set = set(centered_columns or []) - if headers is None: - self.print_stderr('ERROR: Header are no set') - return None - - # Decide which separator to use - def create_separator(index): - if centered_columns is None: - return '-' - return ':-:' if index in centered_column_set else '-' - - # Build the row separator - row_separator = col_sep + col_sep.join(create_separator(index) for index, _ in enumerate(headers)) + col_sep - # build table rows - table_rows = [col_sep + col_sep.join(headers) + col_sep, row_separator] - table_rows.extend(col_sep + col_sep.join(row) + col_sep for row in rows) - return '\n'.join(table_rows) - - def generate_jira_table(self, headers, rows, centered_columns=None): - col_sep = '*|*' - if headers is None: - self.print_stderr('ERROR: Header are no set') - return None - - table_header = '|*' + col_sep.join(headers) + '*|\n' - table = table_header - for row in rows: - if len(headers) == len(row): - table += '|' + '|'.join(row) + '|\n' - - return table - def _get_formatter(self) -> Callable[[List[dict]], Dict[str, Any]] or None: """ Get the appropriate formatter function based on the specified format. diff --git a/src/scanoss/inspection/raw/__init__.py b/src/scanoss/inspection/raw/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/scanoss/inspection/raw/component_summary.py b/src/scanoss/inspection/raw/component_summary.py index 6c337a82..03bddbd8 100644 --- a/src/scanoss/inspection/raw/component_summary.py +++ b/src/scanoss/inspection/raw/component_summary.py @@ -21,13 +21,58 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - import json +from typing import Any +from ..policy_check import T from .raw_base import RawBase class ComponentSummary(RawBase): + + def _json(self, data: dict[str,Any]) -> dict[str,Any]: + """ + Format component summary data as JSON. + + This method returns the component summary data in its original JSON structure + without any transformation. The data can be directly serialized to JSON format. + + :param data: Dictionary containing component summary information including: + - components: List of component-license pairs with status and metadata + - totalComponents: Total number of unique components + - undeclaredComponents: Number of components with 'pending' status + - declaredComponents: Number of components with 'identified' status + - totalFilesDetected: Total count of files where components were detected + - totalFilesUndeclared: Count of files with undeclared components + - totalFilesDeclared: Count of files with declared components + :return: The same data dictionary, ready for JSON serialization + """ + return data + + def _markdown(self, data: list[T]) -> dict[str, Any]: + """ + Format component summary data as Markdown (not yet implemented). + + This method is intended to convert component summary data into a human-readable + Markdown format with tables and formatted sections. + + :param data: List of component summary items to format + :return: Dictionary containing formatted Markdown output + """ + pass + + def _jira_markdown(self, data: list[T]) -> dict[str, Any]: + """ + Format component summary data as Jira-flavored Markdown (not yet implemented). + + This method is intended to convert component summary data into Jira-compatible + Markdown format, which may include Jira-specific syntax for tables and formatting. + + :param data: List of component summary items to format + :return: Dictionary containing Jira-formatted Markdown output + """ + pass + def _get_component_summary_from_components(self, scan_components: list)-> dict: """ Get a component summary from detected components. @@ -84,10 +129,16 @@ def _get_components(self): self._get_components_data(self.results, components) return self._convert_components_to_list(components) + def _format(self, component_summary) -> str: + # TODO: Implement formatter to support dynamic outputs + json_data = self._json(component_summary) + return json.dumps(json_data, indent=2) + def run(self): components = self._get_components() component_summary = self._get_component_summary_from_components(components) - self.print_to_file_or_stdout(json.dumps(component_summary, indent=2), self.output) + output = self._format(component_summary) + self.print_to_file_or_stdout(output, self.output) return component_summary # # End of ComponentSummary Class diff --git a/src/scanoss/inspection/raw/copyleft.py b/src/scanoss/inspection/raw/copyleft.py index d778c2e5..97c25bab 100644 --- a/src/scanoss/inspection/raw/copyleft.py +++ b/src/scanoss/inspection/raw/copyleft.py @@ -27,6 +27,7 @@ from typing import Any, Dict, List from ..policy_check import PolicyStatus +from ..utils.markdown_utils import generate_jira_table, generate_table from .raw_base import RawBase @@ -111,7 +112,7 @@ def _markdown(self, components: list[Component]) -> Dict[str, Any]: :param components: List of components with copyleft licenses :return: Dictionary with formatted Markdown details and summary """ - return self._md_summary_generator(components, self.generate_table) + return self._md_summary_generator(components, generate_table) def _jira_markdown(self, components: list[Component]) -> Dict[str, Any]: """ @@ -120,7 +121,7 @@ def _jira_markdown(self, components: list[Component]) -> Dict[str, Any]: :param components: List of components with copyleft licenses :return: Dictionary with formatted Markdown details and summary """ - return self._md_summary_generator(components, self.generate_jira_table) + return self._md_summary_generator(components, generate_jira_table) def _md_summary_generator(self, components: list[Component], table_generator): """ diff --git a/src/scanoss/inspection/raw/license_summary.py b/src/scanoss/inspection/raw/license_summary.py index bd85c56d..c849ea0a 100644 --- a/src/scanoss/inspection/raw/license_summary.py +++ b/src/scanoss/inspection/raw/license_summary.py @@ -23,7 +23,9 @@ """ import json +from typing import Any +from ..policy_check import T from .raw_base import RawBase @@ -36,6 +38,46 @@ class LicenseSummary(RawBase): information, providing detailed summaries including copyleft analysis and license statistics. """ + def _json(self, data: dict[str,Any]) -> dict[str, Any]: + """ + Format license summary data as JSON. + + This method is intended to return the license summary data in JSON structure + for serialization. The data should include license information with copyleft + analysis and license statistics. + + :param data: List of license summary items to format + :return: Dictionary containing license summary information including: + - licenses: List of detected licenses with SPDX IDs, URLs, and copyleft status + - detectedLicenses: Total number of unique licenses + - detectedLicensesWithCopyleft: Count of licenses marked as copyleft + """ + return data + + def _markdown(self, data: list[T]) -> dict[str, Any]: + """ + Format license summary data as Markdown (not yet implemented). + + This method is intended to convert license summary data into a human-readable + Markdown format with tables and formatted sections. + + :param data: List of license summary items to format + :return: Dictionary containing formatted Markdown output + """ + pass + + def _jira_markdown(self, data: list[T]) -> dict[str, Any]: + """ + Format license summary data as Jira-flavored Markdown (not yet implemented). + + This method is intended to convert license summary data into Jira-compatible + Markdown format, which may include Jira-specific syntax for tables and formatting. + + :param data: List of license summary items to format + :return: Dictionary containing Jira-formatted Markdown output + """ + pass + # Define required license fields as class constants REQUIRED_LICENSE_FIELDS = ['spdxid', 'url', 'copyleft', 'source'] @@ -131,10 +173,16 @@ def _get_components(self): self._get_dependencies_data(self.results, components) return self._convert_components_to_list(components) + def _format(self, license_summary) -> str: + # TODO: Implement formatter to support dynamic outputs + json_data = self._json(license_summary) + return json.dumps(json_data, indent=2) + def run(self): components = self._get_components() license_summary = self._get_licenses_summary_from_components(components) - self.print_to_file_or_stdout(json.dumps(license_summary, indent=2), self.output) + output = self._format(license_summary) + self.print_to_file_or_stdout(output, self.output) return license_summary # # End of LicenseSummary Class diff --git a/src/scanoss/inspection/raw/match_summary.py b/src/scanoss/inspection/raw/match_summary.py new file mode 100644 index 00000000..645824bb --- /dev/null +++ b/src/scanoss/inspection/raw/match_summary.py @@ -0,0 +1,290 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +from dataclasses import dataclass + +from ...scanossbase import ScanossBase +from ...utils import scanoss_scan_results_utils +from ..utils.file_utils import load_json_file +from ..utils.markdown_utils import generate_table + + +@dataclass +class MatchSummaryItem: + """ + Represents a single match entry in the SCANOSS results. + + This data class encapsulates all the relevant information about a component + match found during scanning, including file location, license details, and + match quality metrics. + """ + file: str + file_url: str + license: str + similarity: str + purl: str + purl_url: str + version: str + lines: str + + +@dataclass +class ComponentMatchSummary: + """ + Container for categorized SCANOSS match results. + + Organizes matches into two categories: full file matches and snippet matches. + This separation allows for different presentation and analysis of match types. + """ + files: list[MatchSummaryItem] + snippet: list[MatchSummaryItem] + +class MatchSummary(ScanossBase): + """ + Generates Markdown summaries from SCANOSS scan results. + + This class processes SCANOSS scan results and creates human-readable Markdown + reports with collapsible sections for file and snippet matches. The reports + include clickable links to files when a line range + prefix is provided. + """ + + def __init__( # noqa: PLR0913 + self, + debug: bool = False, + trace: bool = False, + quiet: bool = False, + line_range_prefix: str = None, + scanoss_results_path: str = None, + output: str = None, + ): + """ + Initialize the Matches Summary generator. + + :param debug: Enable debug output for troubleshooting + :param trace: Enable trace-level logging for detailed execution tracking + :param quiet: Suppress informational messages + :param line_range_prefix: Base URL prefix for GitLab file links with line ranges + (e.g., 'https://gitlab.com/org/project/-/blob/main') + :param scanoss_results_path: Path to SCANOSS scan results file in JSON format + :param output: Output file path for the generated Markdown report (default: stdout) + """ + super().__init__(debug=debug, trace=trace, quiet=quiet) + self.scanoss_results_path = scanoss_results_path + self.line_range_prefix = line_range_prefix + self.output = output + + + def _get_match_summary_item(self, file_name: str, result: dict) -> MatchSummaryItem: + """ + Create a MatchSummaryItem from a single scan result. + + Processes a SCANOSS scan result and creates a MatchSummaryItem with appropriate + file URLs, license information, and line ranges. Handles both snippet matches + (with specific line ranges) and file matches (entire file). + + :param file_name: Name of the scanned file (relative path in the repository) + :param result: SCANOSS scan result dictionary containing match details + :return: Populated match summary item with all relevant information + """ + if result.get('id') == "snippet": + # Snippet match: create URL with line range anchor + lines = scanoss_scan_results_utils.get_lines(result.get('lines')) + end_line = lines[len(lines) - 1] if len(lines) > 1 else lines[0] + file_url = f"{self.line_range_prefix}/{file_name}#L{lines[0]}-L{end_line}" + return MatchSummaryItem( + file_url=file_url, + file=file_name, + license=result.get('licenses')[0].get('name'), + similarity=result.get('matched'), + purl=result.get('purl')[0], + purl_url=result.get('url'), + version=result.get('version'), + lines=f"{lines[0]}-{lines[len(lines) - 1] if len(lines) > 1 else lines[0]}" + ) + # File match: create URL without line range + return MatchSummaryItem( + file=file_name, + file_url=f"{self.line_range_prefix}/{file_name}", + license=result.get('licenses')[0].get('name'), + similarity=result.get('matched'), + purl=result.get('purl')[0], + purl_url=result.get('url'), + version=result.get('version'), + lines="all" + ) + + def _validate_result(self, file_name: str, result: dict) -> bool: + """ + Validate that a scan result has all required fields. + + :param file_name: Name of the file being validated + :param result: The scan result to validate + :return: True if valid, False otherwise + """ + validations = [ + ('id', 'No id found'), + ('lines', 'No lines found'), + ('purl', 'No purl found'), + ('licenses', 'No licenses found'), + ('version', 'No version found'), + ('matched', 'No matched found'), + ('url', 'No url found'), + ] + + for field, error_msg in validations: + if not result.get(field): + self.print_debug(f'ERROR: {error_msg} for file {file_name}') + return False + + # Additional validation for non-empty lists + if len(result.get('purl')) == 0: + self.print_debug(f'ERROR: No purl found for file {file_name}') + return False + if len(result.get('licenses')) == 0: + self.print_debug(f'ERROR: Empty licenses list for file {file_name}') + return False + + return True + + def _get_matches_summary(self) -> ComponentMatchSummary: + """ + Parse SCANOSS scan results and create categorized match summaries. + + Loads the SCANOSS scan results file and processes each match, validating + required fields and categorizing matches into file matches and snippet matches. + Skips invalid or incomplete results with debug messages. + """ + # Load scan results from JSON file + scan_results = load_json_file(self.scanoss_results_path) + gitlab_matches_summary = ComponentMatchSummary(files=[], snippet=[]) + + # Process each file and its results + for file_name, results in scan_results.items(): + for result in results: + # Skip non-matches + if result.get('id') == "none": + self.print_debug(f'Skipping non-match for file {file_name}') + continue + + # Validate required fields + if not self._validate_result(file_name, result): + continue + + # Create summary item and categorize by match type + summary_item = self._get_match_summary_item(file_name, result) + if result.get('id') == "snippet": + gitlab_matches_summary.snippet.append(summary_item) + else: + gitlab_matches_summary.files.append(summary_item) + + return gitlab_matches_summary + + + def _markdown(self, gitlab_matches_summary: ComponentMatchSummary) -> str: + """ + Generate Markdown from match summaries. + + Creates a formatted Markdown document with collapsible sections for file + and snippet matches. + + :param gitlab_matches_summary: Container with categorized file and snippet matches to format + :return: Complete Markdown document with formatted match tables + """ + + if len(gitlab_matches_summary.files) == 0 and len(gitlab_matches_summary.snippet) == 0: + return "" + + # Define table headers + file_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version'] + snippet_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version', 'Lines'] + # Build file matches table + file_match_rows = [] + for file_match in gitlab_matches_summary.files: + row = [ + f"[{file_match.file}]({file_match.file_url})", + file_match.license, + file_match.similarity, + f"[{file_match.purl}]({file_match.purl_url})", + file_match.version, + ] + file_match_rows.append(row) + file_match_table = generate_table(file_match_headers, file_match_rows) + + # Build snippet matches table + snippet_match_rows = [] + for snippet_match in gitlab_matches_summary.snippet: + row = [ + f"[{snippet_match.file}]({snippet_match.file_url})", + snippet_match.license, + snippet_match.similarity, + f"[{snippet_match.purl}]({snippet_match.purl_url})", + snippet_match.version, + snippet_match.lines + ] + snippet_match_rows.append(row) + snippet_match_table = generate_table(snippet_match_headers, snippet_match_rows) + + # Assemble complete Markdown document + markdown = "" + markdown += "### SCANOSS Match Summary\n\n" + + # File matches section (collapsible) + markdown += "
\n" + markdown += "File Match Summary\n\n" + markdown += file_match_table + markdown += "\n
\n" + + # Snippet matches section (collapsible) + markdown += "
\n" + markdown += "Snippet Match Summary\n\n" + markdown += snippet_match_table + markdown += "\n
\n" + + return markdown + + def run(self): + """ + Execute the matches summary generation process. + + This is the main entry point for generating the matches summary report. + It orchestrates the entire workflow: + 1. Loads and parses SCANOSS scan results + 2. Validates and categorizes matches + 3. Generates Markdown report + 4. Outputs to file or stdout + """ + # Load and process scan results into categorized matches + matches = self._get_matches_summary() + + # Format matches as GitLab-compatible Markdown + matches_md = self._markdown(matches) + if matches_md == "": + self.print_stdout("No matches found.") + return + # Output to file or stdout + self.print_to_file_or_stdout(matches_md, self.output) + + + diff --git a/src/scanoss/inspection/raw/raw_base.py b/src/scanoss/inspection/raw/raw_base.py index 0bae631e..8a1a6f8d 100644 --- a/src/scanoss/inspection/raw/raw_base.py +++ b/src/scanoss/inspection/raw/raw_base.py @@ -22,13 +22,12 @@ THE SOFTWARE. """ -import json -import os.path from abc import abstractmethod from enum import Enum from typing import Any, Dict, TypeVar from ..policy_check import PolicyCheck +from ..utils.file_utils import load_json_file from ..utils.license_utils import LicenseUtil @@ -313,15 +312,11 @@ def _load_input_file(self): Returns: Dict[str, Any]: The parsed JSON data """ - if not os.path.exists(self.filepath): - self.print_stderr(f'ERROR: The file "{self.filepath}" does not exist.') - return None - with open(self.filepath, 'r') as jsonfile: - try: - return json.load(jsonfile) - except Exception as e: + try: + return load_json_file(self.filepath) + except Exception as e: self.print_stderr(f'ERROR: Problem parsing input JSON: {e}') - return None + return None def _convert_components_to_list(self, components: dict): if components is None: diff --git a/src/scanoss/inspection/raw/undeclared_component.py b/src/scanoss/inspection/raw/undeclared_component.py index 948dd907..a7e32dac 100644 --- a/src/scanoss/inspection/raw/undeclared_component.py +++ b/src/scanoss/inspection/raw/undeclared_component.py @@ -27,6 +27,7 @@ from typing import Any, Dict, List from ..policy_check import PolicyStatus +from ..utils.markdown_utils import generate_jira_table, generate_table from .raw_base import RawBase @@ -193,7 +194,7 @@ def _markdown(self, components: list[Component]) -> Dict[str, Any]: for component in component_licenses: rows.append([component.get('purl'), component.get('spdxid')]) return { - 'details': f'### Undeclared components\n{self.generate_table(headers, rows)}\n', + 'details': f'### Undeclared components\n{generate_table(headers, rows)}\n', 'summary': self._get_summary(component_licenses), } @@ -211,7 +212,7 @@ def _jira_markdown(self, components: list) -> Dict[str, Any]: for component in component_licenses: rows.append([component.get('purl'), component.get('spdxid')]) return { - 'details': f'{self.generate_jira_table(headers, rows)}', + 'details': f'{generate_jira_table(headers, rows)}', 'summary': self._get_jira_summary(component_licenses), } diff --git a/src/scanoss/inspection/utils/file_utils.py b/src/scanoss/inspection/utils/file_utils.py new file mode 100644 index 00000000..a7e5de41 --- /dev/null +++ b/src/scanoss/inspection/utils/file_utils.py @@ -0,0 +1,44 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +import json +import os + + +def load_json_file(file_path: str) -> dict: + """ + Load the file + + :param file_path: file path to the JSON file + + Returns: + Dict[str, Any]: The parsed JSON data + """ + if not os.path.exists(file_path): + raise ValueError(f'The file "{file_path}" does not exist.') + with open(file_path, 'r') as jsonfile: + try: + return json.load(jsonfile) + except Exception as e: + raise ValueError(f'ERROR: Problem parsing input JSON: {e}') \ No newline at end of file diff --git a/src/scanoss/inspection/utils/markdown_utils.py b/src/scanoss/inspection/utils/markdown_utils.py new file mode 100644 index 00000000..0ce47a26 --- /dev/null +++ b/src/scanoss/inspection/utils/markdown_utils.py @@ -0,0 +1,63 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +def generate_table(headers, rows, centered_columns=None): + """ + Generate a Markdown table. + + :param headers: List of headers for the table. + :param rows: List of rows for the table. + :param centered_columns: List of column indices to be centered. + :return: A string representing the Markdown table. + """ + col_sep = ' | ' + centered_column_set = set(centered_columns or []) + if headers is None: + return None + + # Decide which separator to use + def create_separator(index): + if centered_columns is None: + return '-' + return ':-:' if index in centered_column_set else '-' + + # Build the row separator + row_separator = col_sep + col_sep.join(create_separator(index) for index, _ in enumerate(headers)) + col_sep + # build table rows + table_rows = [col_sep + col_sep.join(headers) + col_sep, row_separator] + table_rows.extend(col_sep + col_sep.join(row) + col_sep for row in rows) + return '\n'.join(table_rows) + +def generate_jira_table(headers, rows, centered_columns=None): + col_sep = '*|*' + if headers is None: + return None + + table_header = '|*' + col_sep.join(headers) + '*|\n' + table = table_header + for row in rows: + if len(headers) == len(row): + table += '|' + '|'.join(row) + '|\n' + + return table \ No newline at end of file diff --git a/src/scanoss/scanners/folder_hasher.py b/src/scanoss/scanners/folder_hasher.py index 2e516780..eb4bd726 100644 --- a/src/scanoss/scanners/folder_hasher.py +++ b/src/scanoss/scanners/folder_hasher.py @@ -158,6 +158,7 @@ def _build_root_node( filtered_files.sort() bar = Bar('Hashing files...', max=len(filtered_files)) + full_file_path = '' for file_path in filtered_files: try: file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path diff --git a/src/scanoss/utils/scanoss_scan_results_utils.py b/src/scanoss/utils/scanoss_scan_results_utils.py new file mode 100644 index 00000000..a9ac1fbb --- /dev/null +++ b/src/scanoss/utils/scanoss_scan_results_utils.py @@ -0,0 +1,41 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +def get_lines(lines: str) -> list: + """ + Parse line range string into a list of line numbers. + + Converts SCANOSS line notation (e.g., '10-20,25-30') into a flat list + of individual line numbers for processing. + + :param lines: Comma-separated line ranges in SCANOSS format (e.g., '10-20,25-30') + :return: Flat list of all line numbers extracted from the ranges + """ + lines_list = [] + lines = lines.split(',') + for line in lines: + line_parts = line.split('-') + for part in line_parts: + lines_list.append(int(part)) + return lines_list \ No newline at end of file