From 46e7a82e9ae20458c996838647f2e7d7f2c34ba9 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Fri, 19 Jul 2019 16:02:26 +0200 Subject: [PATCH 1/5] Reuse simplified query to reduce work in shardedSearch This commit does a couple of things to make the added behavior correct: 1. It reuses the simplified query produced by `selectRepoSet` which in turn reduces the work the shards have to do when evaluating the query 2. It only evaluates the `query.RepoSet` if it's part of a top-level `query.And`. The previous code was too naive when considering that a `query.RepoSet` can appear anywhere in a `query.Q`. See this comment for more details: https://github.com/sourcegraph/zoekt/pull/14#discussion_r305324783 3. It changes the const value that's returned in the simplified version so that `typeRepoSearcher`, which evaluates `query.TypeRepo` queries to a `query.RepoSet` works (the tests in `shards/eval_test.go` break without this. --- shards/shards.go | 30 ++++++++++++++++++------------ shards/shards_test.go | 11 ++++++++++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/shards/shards.go b/shards/shards.go index dbf901b4f..88ddb8be8 100644 --- a/shards/shards.go +++ b/shards/shards.go @@ -111,15 +111,20 @@ func (ss *shardedSearcher) Close() { } } -func selectRepoSet(shards []rankedShard, q query.Q) []rankedShard { - filtered := shards[:0] +func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) { + and, ok := q.(*query.And) + if !ok { + return shards, q + } - eval := query.Map(q, func(q query.Q) query.Q { - setQuery, ok := q.(*query.RepoSet) + for i, c := range and.Children { + setQuery, ok := c.(*query.RepoSet) if !ok { - return q + continue } + filtered := shards[:0] + for _, s := range shards { if repositorer, ok := s.Searcher.(Repositorer); ok { repo := repositorer.Repository() @@ -129,15 +134,16 @@ func selectRepoSet(shards []rankedShard, q query.Q) []rankedShard { } } - return &query.Const{Value: true} - }) - query.Simplify(eval) + and.Children[i] = &query.Const{Value: len(filtered) > 0} - if len(filtered) != 0 { - return filtered + // Stop after first RepoSet, otherwise we might append duplicate + // shards to `filtered` + if len(filtered) != 0 { + return filtered, and + } } - return shards + return shards, and } func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { @@ -174,7 +180,7 @@ func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.Se start = time.Now() shards := ss.getShards() - shards = selectRepoSet(shards, q) + shards, q = selectRepoSet(shards, q) all := make(chan shardResult, len(shards)) diff --git a/shards/shards_test.go b/shards/shards_test.go index a1ab70ed3..717bd3c3f 100644 --- a/shards/shards_test.go +++ b/shards/shards_test.go @@ -166,7 +166,7 @@ func TestFilteringShardsByRepoSet(t *testing.T) { shardName := fmt.Sprintf("shard%d", i) repoName := fmt.Sprintf("repository%d", i) - if i%2 == 0 { + if i%3 == 0 { repoSetNames = append(repoSetNames, repoName) } @@ -195,6 +195,15 @@ func TestFilteringShardsByRepoSet(t *testing.T) { if len(res.Files) != len(repoSetNames) { t.Fatalf("with reposet: got %d results, want %d", len(res.Files), len(repoSetNames)) } + + // With the same reposet multiple times + res, err = ss.Search(context.Background(), query.NewAnd(set, set, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Errorf("Search: %v", err) + } + if len(res.Files) != len(repoSetNames) { + t.Fatalf("with reposet multiple times: got %d results, want %d", len(res.Files), len(repoSetNames)) + } } type memSeeker struct { From 94f2bfea35ffe1419f611edd1b73bd5d70d2a462 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Fri, 19 Jul 2019 16:08:38 +0200 Subject: [PATCH 2/5] Unexport Repositorer interface --- shards/shards.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shards/shards.go b/shards/shards.go index 88ddb8be8..799c95b9b 100644 --- a/shards/shards.go +++ b/shards/shards.go @@ -31,7 +31,7 @@ import ( "github.com/google/zoekt/query" ) -type Repositorer interface { +type repositorer interface { Repository() *zoekt.Repository } @@ -126,7 +126,7 @@ func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) { filtered := shards[:0] for _, s := range shards { - if repositorer, ok := s.Searcher.(Repositorer); ok { + if repositorer, ok := s.Searcher.(repositorer); ok { repo := repositorer.Repository() if setQuery.Set[repo.Name] { filtered = append(filtered, s) From d356677ef2360bd81de76cb865b02f54ed335e74 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Fri, 19 Jul 2019 17:06:39 +0200 Subject: [PATCH 3/5] Fix benchmarks by using fresh queries --- shards/shards.go | 1 - shards/shards_test.go | 25 +++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/shards/shards.go b/shards/shards.go index 799c95b9b..3916092d2 100644 --- a/shards/shards.go +++ b/shards/shards.go @@ -133,7 +133,6 @@ func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) { } } } - and.Children[i] = &query.Const{Value: len(filtered) > 0} // Stop after first RepoSet, otherwise we might append duplicate diff --git a/shards/shards_test.go b/shards/shards_test.go index 717bd3c3f..393913918 100644 --- a/shards/shards_test.go +++ b/shards/shards_test.go @@ -342,11 +342,16 @@ func BenchmarkShardedSearch(b *testing.B) { ctx := context.Background() opts := &zoekt.SearchOptions{} - set := query.NewRepoSet(repoSetNames...) needleSub := &query.Substring{Pattern: "needle"} haystackSub := &query.Substring{Pattern: "haystack"} helloworldSub := &query.Substring{Pattern: "helloworld"} + setAnd := func(q query.Q) func() query.Q { + return func() query.Q { + return query.NewAnd(query.NewRepoSet(repoSetNames...), q) + } + } + search := func(b *testing.B, q query.Q, wantFiles int) { b.Helper() @@ -361,25 +366,25 @@ func BenchmarkShardedSearch(b *testing.B) { benchmarks := []struct { name string - q query.Q + q func() query.Q wantFiles int }{ - {"substring all results", haystackSub, len(repos) * filesPerRepo}, - {"substring no results", helloworldSub, 0}, - {"substring some results", needleSub, len(repos)}, + {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, + {"substring no results", func() query.Q { return helloworldSub }, 0}, + {"substring some results", func() query.Q { return needleSub }, len(repos)}, - {"substring all results and repo set", query.NewAnd(set, haystackSub), len(repoSetNames) * filesPerRepo}, - {"substring some results and repo set", query.NewAnd(set, needleSub), len(repoSetNames)}, - {"substring no results and repo set", query.NewAnd(set, helloworldSub), 0}, + {"substring all results and repo set", setAnd(haystackSub), len(repoSetNames) * filesPerRepo}, + {"substring some results and repo set", setAnd(needleSub), len(repoSetNames)}, + {"substring no results and repo set", setAnd(helloworldSub), 0}, } for _, bb := range benchmarks { b.Run(bb.name, func(b *testing.B) { - q := bb.q - b.ReportAllocs() for n := 0; n < b.N; n++ { + q := bb.q() + search(b, q, bb.wantFiles) } }) From 7756eadf9ba6e644647631adc6808ced1f1ae4a6 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Mon, 22 Jul 2019 14:54:57 +0200 Subject: [PATCH 4/5] unconditionally return Change-Id: I113b30a29a1b8f61e4969c52b155d7fa09e0cc75 --- shards/shards.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/shards/shards.go b/shards/shards.go index 3916092d2..6601895cb 100644 --- a/shards/shards.go +++ b/shards/shards.go @@ -137,9 +137,7 @@ func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) { // Stop after first RepoSet, otherwise we might append duplicate // shards to `filtered` - if len(filtered) != 0 { - return filtered, and - } + return filtered, and } return shards, and From f093f569b0bf805e129c6d626d59edcc5f460191 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Mon, 22 Jul 2019 15:01:11 +0200 Subject: [PATCH 5/5] Simplify after evaluating reposet Change-Id: I44c3f3face83f5d737a5f9413cf3db3d00c02850 --- shards/shards.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shards/shards.go b/shards/shards.go index 6601895cb..a41c7a894 100644 --- a/shards/shards.go +++ b/shards/shards.go @@ -137,7 +137,7 @@ func selectRepoSet(shards []rankedShard, q query.Q) ([]rankedShard, query.Q) { // Stop after first RepoSet, otherwise we might append duplicate // shards to `filtered` - return filtered, and + return filtered, query.Simplify(and) } return shards, and