Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions shards/shards.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
"github.com/google/zoekt/query"
)

type Repositorer interface {
type repositorer interface {
Repository() *zoekt.Repository
}

Expand Down Expand Up @@ -111,33 +111,36 @@ func (ss *shardedSearcher) Close() {
}
}

func selectRepoSet(shards []rankedShard, q query.Q) []rankedShard {
filtered := shards[:0]
func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) {
and, ok := q.(*query.And)
if !ok {
return shards, q
}

eval := query.Map(q, func(q query.Q) query.Q {
setQuery, ok := q.(*query.RepoSet)
for i, c := range and.Children {
setQuery, ok := c.(*query.RepoSet)
if !ok {
return q
continue
}

filtered := shards[:0]

for _, s := range shards {
if repositorer, ok := s.Searcher.(Repositorer); ok {
if repositorer, ok := s.Searcher.(repositorer); ok {
repo := repositorer.Repository()
if setQuery.Set[repo.Name] {
filtered = append(filtered, s)
}
}
}
and.Children[i] = &query.Const{Value: len(filtered) > 0}

return &query.Const{Value: true}
})
query.Simplify(eval)

if len(filtered) != 0 {
return filtered
// Stop after first RepoSet, otherwise we might append duplicate
// shards to `filtered`
return filtered, query.Simplify(and)
}

return shards
return shards, and
}

func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
Expand Down Expand Up @@ -174,7 +177,7 @@ func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.Se
start = time.Now()

shards := ss.getShards()
shards = selectRepoSet(shards, q)
shards, q = selectRepoSet(shards, q)

all := make(chan shardResult, len(shards))

Expand Down
36 changes: 25 additions & 11 deletions shards/shards_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func TestFilteringShardsByRepoSet(t *testing.T) {
shardName := fmt.Sprintf("shard%d", i)
repoName := fmt.Sprintf("repository%d", i)

if i%2 == 0 {
if i%3 == 0 {
repoSetNames = append(repoSetNames, repoName)
}

Expand Down Expand Up @@ -195,6 +195,15 @@ func TestFilteringShardsByRepoSet(t *testing.T) {
if len(res.Files) != len(repoSetNames) {
t.Fatalf("with reposet: got %d results, want %d", len(res.Files), len(repoSetNames))
}

// With the same reposet multiple times
res, err = ss.Search(context.Background(), query.NewAnd(set, set, sub), &zoekt.SearchOptions{})
if err != nil {
t.Errorf("Search: %v", err)
}
if len(res.Files) != len(repoSetNames) {
t.Fatalf("with reposet multiple times: got %d results, want %d", len(res.Files), len(repoSetNames))
}
}

type memSeeker struct {
Expand Down Expand Up @@ -333,11 +342,16 @@ func BenchmarkShardedSearch(b *testing.B) {
ctx := context.Background()
opts := &zoekt.SearchOptions{}

set := query.NewRepoSet(repoSetNames...)
needleSub := &query.Substring{Pattern: "needle"}
haystackSub := &query.Substring{Pattern: "haystack"}
helloworldSub := &query.Substring{Pattern: "helloworld"}

setAnd := func(q query.Q) func() query.Q {
return func() query.Q {
return query.NewAnd(query.NewRepoSet(repoSetNames...), q)
}
}

search := func(b *testing.B, q query.Q, wantFiles int) {
b.Helper()

Expand All @@ -352,25 +366,25 @@ func BenchmarkShardedSearch(b *testing.B) {

benchmarks := []struct {
name string
q query.Q
q func() query.Q
wantFiles int
}{
{"substring all results", haystackSub, len(repos) * filesPerRepo},
{"substring no results", helloworldSub, 0},
{"substring some results", needleSub, len(repos)},
{"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
{"substring no results", func() query.Q { return helloworldSub }, 0},
{"substring some results", func() query.Q { return needleSub }, len(repos)},

{"substring all results and repo set", query.NewAnd(set, haystackSub), len(repoSetNames) * filesPerRepo},
{"substring some results and repo set", query.NewAnd(set, needleSub), len(repoSetNames)},
{"substring no results and repo set", query.NewAnd(set, helloworldSub), 0},
{"substring all results and repo set", setAnd(haystackSub), len(repoSetNames) * filesPerRepo},
{"substring some results and repo set", setAnd(needleSub), len(repoSetNames)},
{"substring no results and repo set", setAnd(helloworldSub), 0},
}

for _, bb := range benchmarks {
b.Run(bb.name, func(b *testing.B) {
q := bb.q

b.ReportAllocs()

for n := 0; n < b.N; n++ {
q := bb.q()

search(b, q, bb.wantFiles)
}
})
Expand Down