From 0e1b2b05f81640fec49387195c3e43a7a26140e1 Mon Sep 17 00:00:00 2001 From: Matias Daloia Date: Tue, 28 Oct 2025 11:44:59 +0100 Subject: [PATCH 1/4] [SP-3557] fix: sort scan results by ascending order --- internal/service/scan_service_impl.go | 20 +- internal/service/scan_service_impl_test.go | 539 +++++++++++++++++++++ 2 files changed, 556 insertions(+), 3 deletions(-) create mode 100644 internal/service/scan_service_impl_test.go diff --git a/internal/service/scan_service_impl.go b/internal/service/scan_service_impl.go index bd8d8b7..39c56d0 100644 --- a/internal/service/scan_service_impl.go +++ b/internal/service/scan_service_impl.go @@ -221,9 +221,23 @@ func (s *ScanServiceImpl) deduplicateComponents(results []*entities.ScanResult) } } - sort.Slice(deduplicatedResults, func(i, j int) bool { - return deduplicatedResults[i].PathID < deduplicatedResults[j].PathID - }) + sortByBestComponentOrder(deduplicatedResults) return deduplicatedResults } + +// sortByBestComponentOrder sorts the results by the lowest order of the component groups (lower order is better). +func sortByBestComponentOrder(results []*entities.ScanResult) { + sort.Slice(results, func(i, j int) bool { + minOrderI := int32(^uint32(0) >> 1) // Max int32 + for _, group := range results[i].ComponentGroups { + minOrderI = min(minOrderI, group.Order) + } + + minOrderJ := int32(^uint32(0) >> 1) // Max int32 + for _, group := range results[j].ComponentGroups { + minOrderJ = min(minOrderJ, group.Order) + } + return minOrderI < minOrderJ + }) +} diff --git a/internal/service/scan_service_impl_test.go b/internal/service/scan_service_impl_test.go new file mode 100644 index 0000000..2b5cb39 --- /dev/null +++ b/internal/service/scan_service_impl_test.go @@ -0,0 +1,539 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2024 SCANOSS.COM + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package service + +import ( + "testing" + + "github.com/scanoss/folder-hashing-api/internal/domain/entities" +) + +func TestSortByBestComponentOrder(t *testing.T) { + tests := []struct { + name string + input []*entities.ScanResult + expected []string // Expected PathIDs in order + }{ + { + name: "Empty slice", + input: []*entities.ScanResult{}, + expected: []string{}, + }, + { + name: "Single result", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + }, + }, + }, + expected: []string{"/path/one"}, + }, + { + name: "Two results - ascending order", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 1}, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 2}, + }, + }, + }, + expected: []string{"/path/one", "/path/two"}, + }, + { + name: "Two results - descending order (needs sorting)", + input: []*entities.ScanResult{ + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + }, + }, + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 2}, + }, + }, + }, + expected: []string{"/path/one", "/path/two"}, + }, + { + name: "Multiple results with various orders", + input: []*entities.ScanResult{ + { + PathID: "/path/c", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 10}, + }, + }, + { + PathID: "/path/a", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 1}, + }, + }, + { + PathID: "/path/b", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + }, + }, + }, + expected: []string{"/path/a", "/path/b", "/path/c"}, + }, + { + name: "Multiple component groups - uses minimum order", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 10}, + {Order: 2}, // Minimum is 2 + {Order: 15}, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + {Order: 3}, // Minimum is 3 + {Order: 8}, + }, + }, + }, + expected: []string{"/path/one", "/path/two"}, + }, + { + name: "Equal minimum orders - maintains stable sort", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 5}, + }, + }, + }, + expected: []string{"/path/one", "/path/two"}, + }, + { + name: "Complex scenario with multiple groups and varying orders", + input: []*entities.ScanResult{ + { + PathID: "/path/high", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 100}, + {Order: 50}, + }, + }, + { + PathID: "/path/best", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 1}, + }, + }, + { + PathID: "/path/medium", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 20}, + {Order: 15}, + {Order: 25}, + }, + }, + { + PathID: "/path/low", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 3}, + {Order: 5}, + }, + }, + }, + expected: []string{"/path/best", "/path/low", "/path/medium", "/path/high"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Make a copy to avoid modifying the original + input := make([]*entities.ScanResult, len(tt.input)) + copy(input, tt.input) + + // Sort the results + sortByBestComponentOrder(input) + + // Verify the order + if len(input) != len(tt.expected) { + t.Errorf("Expected %d results, got %d", len(tt.expected), len(input)) + return + } + + for i, expectedPath := range tt.expected { + if input[i].PathID != expectedPath { + t.Errorf("Position %d: expected PathID %s, got %s", i, expectedPath, input[i].PathID) + } + } + }) + } +} + +func TestHasHighScoreMatch(t *testing.T) { + service := &ScanServiceImpl{} + + tests := []struct { + name string + componentGroups []entities.ComponentGroup + threshold float32 + expected bool + }{ + { + name: "Empty component groups", + componentGroups: []entities.ComponentGroup{}, + threshold: 0.5, + expected: false, + }, + { + name: "No versions meet threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Score: 0.3}, + {Score: 0.4}, + }, + }, + }, + threshold: 0.5, + expected: false, + }, + { + name: "One version meets threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Score: 0.3}, + {Score: 0.6}, + }, + }, + }, + threshold: 0.5, + expected: true, + }, + { + name: "Version equals threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Score: 0.5}, + }, + }, + }, + threshold: 0.5, + expected: true, + }, + { + name: "Multiple groups, one has high score", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Score: 0.2}, + }, + }, + { + Versions: []entities.Version{ + {Score: 0.9}, + }, + }, + }, + threshold: 0.8, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := service.hasHighScoreMatch(tt.componentGroups, tt.threshold) + if result != tt.expected { + t.Errorf("Expected %v, got %v", tt.expected, result) + } + }) + } +} + +func TestProcessComponentGroups(t *testing.T) { + service := &ScanServiceImpl{} + + tests := []struct { + name string + componentGroups []entities.ComponentGroup + path string + minAcceptedScore float32 + expectedCount int // Number of results + expectedVersions int // Number of versions in first group + }{ + { + name: "Empty component groups", + componentGroups: []entities.ComponentGroup{}, + path: "/test/path", + minAcceptedScore: 0.5, + expectedCount: 0, + expectedVersions: 0, + }, + { + name: "All versions below threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Version: "1.0.0", Score: 0.3}, + {Version: "2.0.0", Score: 0.4}, + }, + }, + }, + path: "/test/path", + minAcceptedScore: 0.5, + expectedCount: 0, + expectedVersions: 0, + }, + { + name: "Some versions above threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Version: "1.0.0", Score: 0.3}, + {Version: "2.0.0", Score: 0.6}, + {Version: "3.0.0", Score: 0.8}, + }, + }, + }, + path: "/test/path", + minAcceptedScore: 0.5, + expectedCount: 1, + expectedVersions: 2, + }, + { + name: "All versions above threshold", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Version: "1.0.0", Score: 0.6}, + {Version: "2.0.0", Score: 0.7}, + }, + }, + }, + path: "/test/path", + minAcceptedScore: 0.5, + expectedCount: 1, + expectedVersions: 2, + }, + { + name: "Multiple groups, mixed results", + componentGroups: []entities.ComponentGroup{ + { + Versions: []entities.Version{ + {Version: "1.0.0", Score: 0.3}, + }, + }, + { + Versions: []entities.Version{ + {Version: "2.0.0", Score: 0.8}, + }, + }, + }, + path: "/test/path", + minAcceptedScore: 0.5, + expectedCount: 1, + expectedVersions: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results := service.processComponentGroups(tt.componentGroups, tt.path, tt.minAcceptedScore) + + if len(results) != tt.expectedCount { + t.Errorf("Expected %d results, got %d", tt.expectedCount, len(results)) + return + } + + if tt.expectedCount > 0 { + if results[0].PathID != tt.path { + t.Errorf("Expected PathID %s, got %s", tt.path, results[0].PathID) + } + + totalVersions := 0 + for _, group := range results[0].ComponentGroups { + totalVersions += len(group.Versions) + } + + if totalVersions != tt.expectedVersions { + t.Errorf("Expected %d versions, got %d", tt.expectedVersions, totalVersions) + } + } + }) + } +} + +func TestDeduplicateComponents(t *testing.T) { + service := &ScanServiceImpl{} + + tests := []struct { + name string + input []*entities.ScanResult + expectedCount int + expectedPaths []string + }{ + { + name: "Empty results", + input: []*entities.ScanResult{}, + expectedCount: 0, + expectedPaths: []string{}, + }, + { + name: "No duplicates", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-a", + Order: 1, + Versions: []entities.Version{ + {Score: 0.8}, + }, + }, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-b", + Order: 2, + Versions: []entities.Version{ + {Score: 0.7}, + }, + }, + }, + }, + }, + expectedCount: 2, + expectedPaths: []string{"/path/one", "/path/two"}, + }, + { + name: "Duplicate component - keep higher score", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-a", + Order: 2, + Versions: []entities.Version{ + {Score: 0.6}, + }, + }, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-a", + Order: 1, + Versions: []entities.Version{ + {Score: 0.9}, + }, + }, + }, + }, + }, + expectedCount: 1, + expectedPaths: []string{"/path/two"}, + }, + { + name: "Multiple duplicates across paths", + input: []*entities.ScanResult{ + { + PathID: "/path/one", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-a", + Order: 1, + Versions: []entities.Version{ + {Score: 0.5}, + }, + }, + { + PURL: "pkg:npm/component-b", + Order: 2, + Versions: []entities.Version{ + {Score: 0.6}, + }, + }, + }, + }, + { + PathID: "/path/two", + ComponentGroups: []*entities.ComponentGroup{ + { + PURL: "pkg:npm/component-a", + Order: 3, + Versions: []entities.Version{ + {Score: 0.9}, + }, + }, + }, + }, + }, + expectedCount: 2, + // After deduplication: /path/one keeps component-b (order 2), /path/two keeps component-a (order 3) + // Sorted by order: /path/one (2) < /path/two (3) + expectedPaths: []string{"/path/one", "/path/two"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results := service.deduplicateComponents(tt.input) + + if len(results) != tt.expectedCount { + t.Errorf("Expected %d results, got %d", tt.expectedCount, len(results)) + return + } + + // Verify paths match expected (already sorted by order) + for i, expectedPath := range tt.expectedPaths { + if results[i].PathID != expectedPath { + t.Errorf("Position %d: expected PathID %s, got %s", i, expectedPath, results[i].PathID) + } + } + }) + } +} From 1345074174e4d0397e0acbe5d79bebe9a5cb692b Mon Sep 17 00:00:00 2001 From: Matias Daloia Date: Tue, 28 Oct 2025 11:46:54 +0100 Subject: [PATCH 2/4] [SP-3557] chore: update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ceafe4f..feb0c2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.1] - 2025-10-28 +### Added +- Fix sorting order of results. Now sorting by `Order` field in ascending order, instead of `PathId`. + ## [0.7.0] - 2025-10-24 ## Changed - Update qdrant's docker compose file name @@ -46,3 +50,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [0.5.0]: https://github.com/scanoss/folder-hashing-api/compare/v0.4.2...v0.5.0 [0.6.0]: https://github.com/scanoss/folder-hashing-api/compare/v0.5.0...v0.6.0 [0.7.0]: https://github.com/scanoss/folder-hashing-api/compare/v0.6.0...v0.7.0 +[0.7.1]: https://github.com/scanoss/folder-hashing-api/compare/v0.7.0...v0.7.1 From f5cb7889311a1ffe09ca1ab49d75a131520848c0 Mon Sep 17 00:00:00 2001 From: Matias Daloia Date: Tue, 28 Oct 2025 12:17:16 +0100 Subject: [PATCH 3/4] [SP-3557] chore: fix changelog, add test --- CHANGELOG.md | 2 +- internal/service/scan_service_impl.go | 7 ++++--- internal/service/scan_service_impl_test.go | 24 +++++++++++++++++++++- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index feb0c2a..6ddea8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.7.1] - 2025-10-28 -### Added +### Fixed - Fix sorting order of results. Now sorting by `Order` field in ascending order, instead of `PathId`. ## [0.7.0] - 2025-10-24 diff --git a/internal/service/scan_service_impl.go b/internal/service/scan_service_impl.go index 39c56d0..d9f1e92 100644 --- a/internal/service/scan_service_impl.go +++ b/internal/service/scan_service_impl.go @@ -18,6 +18,7 @@ package service import ( "context" + "math" "sort" "github.com/grpc-ecosystem/go-grpc-middleware/logging/zap/ctxzap" @@ -228,13 +229,13 @@ func (s *ScanServiceImpl) deduplicateComponents(results []*entities.ScanResult) // sortByBestComponentOrder sorts the results by the lowest order of the component groups (lower order is better). func sortByBestComponentOrder(results []*entities.ScanResult) { - sort.Slice(results, func(i, j int) bool { - minOrderI := int32(^uint32(0) >> 1) // Max int32 + sort.SliceStable(results, func(i, j int) bool { + minOrderI := int32(math.MaxInt32) for _, group := range results[i].ComponentGroups { minOrderI = min(minOrderI, group.Order) } - minOrderJ := int32(^uint32(0) >> 1) // Max int32 + minOrderJ := int32(math.MaxInt32) for _, group := range results[j].ComponentGroups { minOrderJ = min(minOrderJ, group.Order) } diff --git a/internal/service/scan_service_impl_test.go b/internal/service/scan_service_impl_test.go index 2b5cb39..09e245d 100644 --- a/internal/service/scan_service_impl_test.go +++ b/internal/service/scan_service_impl_test.go @@ -81,6 +81,28 @@ func TestSortByBestComponentOrder(t *testing.T) { }, expected: []string{"/path/one", "/path/two"}, }, + { + name: "Result with empty ComponentGroups sorts last", + input: []*entities.ScanResult{ + { + PathID: "/path/with-groups", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 10}, + }, + }, + { + PathID: "/path/empty-groups", + ComponentGroups: []*entities.ComponentGroup{}, + }, + { + PathID: "/path/with-low-order", + ComponentGroups: []*entities.ComponentGroup{ + {Order: 1}, + }, + }, + }, + expected: []string{"/path/with-low-order", "/path/with-groups", "/path/empty-groups"}, + }, { name: "Multiple results with various orders", input: []*entities.ScanResult{ @@ -524,7 +546,7 @@ func TestDeduplicateComponents(t *testing.T) { results := service.deduplicateComponents(tt.input) if len(results) != tt.expectedCount { - t.Errorf("Expected %d results, got %d", tt.expectedCount, len(results)) + t.Errorf("Expected %d results, got %d. Results: %v", tt.expectedCount, len(results)) return } From b1a560a2b61680332b8f66c53362546f0da753ae Mon Sep 17 00:00:00 2001 From: Matias Daloia Date: Tue, 28 Oct 2025 12:21:11 +0100 Subject: [PATCH 4/4] [SP-3557] fix: lint error --- internal/service/scan_service_impl_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/service/scan_service_impl_test.go b/internal/service/scan_service_impl_test.go index 09e245d..7a1bc37 100644 --- a/internal/service/scan_service_impl_test.go +++ b/internal/service/scan_service_impl_test.go @@ -546,7 +546,7 @@ func TestDeduplicateComponents(t *testing.T) { results := service.deduplicateComponents(tt.input) if len(results) != tt.expectedCount { - t.Errorf("Expected %d results, got %d. Results: %v", tt.expectedCount, len(results)) + t.Errorf("Expected %d results, got %d", tt.expectedCount, len(results)) return }