Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion search/shards.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,22 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que
}
return false
})
case *query.Meta:
// Meta queries filter repositories based on metadata fields.
// By checking this at the shard level, we can skip entire shards
// that don't contain any matching repositories, avoiding expensive
// I/O operations.
setSize = 0 // Unknown size, we'll filter based on metadata
hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
if repo.Metadata == nil {
return false
}
v, ok := repo.Metadata[setQuery.Field]
if !ok {
return false
}
return setQuery.Value.MatchString(v)
})
default:
continue
}
Expand Down Expand Up @@ -486,7 +502,7 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que
// shard indexData.simplify will simplify to (and true (content baz)) ->
// (content baz). This work can be done now once, rather than per shard.
switch c := c.(type) {
case *query.RepoSet, *query.RepoIDs, *query.Repo:
case *query.RepoSet, *query.RepoIDs, *query.Repo, *query.Meta:
and.Children[i] = &query.Const{Value: true}
return filtered, query.Simplify(and)

Expand Down
176 changes: 176 additions & 0 deletions search/shards_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,182 @@ func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
}
}

func TestFilteringShardsByMeta(t *testing.T) {
ss := newShardedSearcher(1)

// Create repos with different metadata values
// We'll create 30 repos total:
// - 10 with nickname="project-A"
// - 10 with nickname="project-B"
// - 10 with no metadata
n := 30
projectARepos := []string{}
projectBRepos := []string{}

// Common document that will be in all repos
doc := index.Document{
Name: "common.go",
Content: []byte("needle haystack"),
}

for i := range n {
shardName := fmt.Sprintf("shard%d", i)
repoName := fmt.Sprintf("repository%.3d", i)

var metadata map[string]string
if i < 10 {
// First 10 repos have project-A
metadata = map[string]string{"nickname": "project-A", "visibility": "public"}
projectARepos = append(projectARepos, repoName)
} else if i < 20 {
// Next 10 repos have project-B
metadata = map[string]string{"nickname": "project-B", "visibility": "private"}
projectBRepos = append(projectBRepos, repoName)
}
// Last 10 repos have no metadata

repo := &zoekt.Repository{
ID: uint32(i + 1),
Name: repoName,
Metadata: metadata,
}

ss.replace(map[string]zoekt.Searcher{
shardName: searcherForTest(t, testShardBuilder(t, repo, doc)),
})
}

// Test 1: Search without Meta filter - should search all shards
res, err := ss.Search(context.Background(), &query.Substring{Pattern: "needle"}, &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search without filter: %v", err)
}
if len(res.Files) != n {
t.Fatalf("no meta filter: got %d results, want %d", len(res.Files), n)
}

sub := &query.Substring{Pattern: "needle"}

// Helper function to extract unique repo names from search results
getRepoNames := func(files []zoekt.FileMatch) []string {
repoSet := make(map[string]struct{})
for _, f := range files {
repoSet[f.Repository] = struct{}{}
}
repos := make([]string, 0, len(repoSet))
for repo := range repoSet {
repos = append(repos, repo)
}
sort.Strings(repos)
return repos
}

// Test 2: Filter by nickname="project-A" - should only search 10 shards
metaQueryA := &query.Meta{
Field: "nickname",
Value: regexp.MustCompile("^project-A$"),
}
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryA, sub), &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta filter A: %v", err)
}
gotRepos := getRepoNames(res.Files)
wantRepos := append([]string{}, projectARepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotRepos, wantRepos) {
t.Fatalf("Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
}

// Test 3: Filter by nickname="project-B" - should only search 10 shards
metaQueryB := &query.Meta{
Field: "nickname",
Value: regexp.MustCompile("^project-B$"),
}
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryB, sub), &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta filter B: %v", err)
}
gotRepos = getRepoNames(res.Files)
wantRepos = append([]string{}, projectBRepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotRepos, wantRepos) {
t.Fatalf("Meta(nickname=project-B):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
}

// Test 4: Filter by visibility="public" - should only search 10 shards (project-A repos)
metaQueryPublic := &query.Meta{
Field: "visibility",
Value: regexp.MustCompile("^public$"),
}
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryPublic, sub), &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta filter public: %v", err)
}
gotRepos = getRepoNames(res.Files)
wantRepos = append([]string{}, projectARepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotRepos, wantRepos) {
t.Fatalf("Meta(visibility=public):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
}

// Test 5: Filter by non-existent field - should return 0 results
metaQueryNonExistent := &query.Meta{
Field: "nonexistent_field",
Value: regexp.MustCompile(".*"),
}
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryNonExistent, sub), &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta filter non-existent: %v", err)
}
if len(res.Files) != 0 {
t.Fatalf("Meta(nonexistent_field): got %d results, want 0", len(res.Files))
}

// Test 6: Filter by regex pattern matching multiple values
metaQueryRegex := &query.Meta{
Field: "nickname",
Value: regexp.MustCompile("project-.*"), // Matches both project-A and project-B
}
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryRegex, sub), &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta regex filter: %v", err)
}
gotRepos = getRepoNames(res.Files)
wantRepos = append(append([]string{}, projectARepos...), projectBRepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotRepos, wantRepos) {
t.Fatalf("Meta(nickname=project-.*):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
}

// Test 7: Test that Meta query alone (without content search) works
res, err = ss.Search(context.Background(), metaQueryA, &zoekt.SearchOptions{})
if err != nil {
t.Fatalf("Search with Meta query alone: %v", err)
}
gotRepos = getRepoNames(res.Files)
wantRepos = append([]string{}, projectARepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotRepos, wantRepos) {
t.Fatalf("Meta query alone:\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
}

// Test 8: Test with List operation (not just Search)
listRes, err := ss.List(context.Background(), metaQueryA, nil)
if err != nil {
t.Fatalf("List with Meta filter: %v", err)
}
gotListRepos := make([]string, len(listRes.Repos))
for i, r := range listRes.Repos {
gotListRepos[i] = r.Repository.Name
}
sort.Strings(gotListRepos)
wantRepos = append([]string{}, projectARepos...)
sort.Strings(wantRepos)
if !reflect.DeepEqual(gotListRepos, wantRepos) {
t.Fatalf("List with Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotListRepos, wantRepos)
}
}

func hash(name string) uint32 {
h := fnv.New32()
h.Write([]byte(name))
Expand Down
Loading