Skip to content

Commit

Permalink
gitserver: Make ReadDir return an iterator on client side
Browse files Browse the repository at this point in the history
There is potentially a lot of FDs in a repo, so we return an iterator instead to encourage callers to consume it one by one and not load all of it into memory.

Test plan:

Existing tests still pass.
  • Loading branch information
eseliger committed May 13, 2024
1 parent 0be15f8 commit 76f8aca
Show file tree
Hide file tree
Showing 23 changed files with 454 additions and 161 deletions.
25 changes: 22 additions & 3 deletions cmd/frontend/backend/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/sourcegraph/sourcegraph/internal/trace"
"golang.org/x/sync/semaphore"
"io"
"io/fs"
"strconv"

"golang.org/x/sync/semaphore"

"github.com/sourcegraph/sourcegraph/internal/trace"

"github.com/sourcegraph/log"

"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/inventory"
Expand Down Expand Up @@ -73,7 +75,24 @@ func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.C
}
defer gitServerSemaphore.Release(1)
// Using recurse=true does not yield a significant performance improvement. See https://github.com/sourcegraph/sourcegraph/pull/62011/files#r1577513913.
return gsClient.ReadDir(ctx, repo, commitID, path, false)

fds := make([]fs.FileInfo, 0)
it, err := gsClient.ReadDir(ctx, repo, commitID, path, false)
if err != nil {
return nil, err
}
defer it.Close()
for {
fd, err := it.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return nil, err
}
fds = append(fds, fd)
}
return fds, nil
},
NewFileReader: func(ctx context.Context, path string) (io.ReadCloser, error) {
trc, ctx := trace.New(ctx, "NewFileReader waits for semaphore")
Expand Down
8 changes: 4 additions & 4 deletions cmd/frontend/backend/repos_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,18 @@ func TestReposGetInventory(t *testing.T) {
}
return &fileutil.FileInfo{Name_: path, Mode_: os.ModeDir, Sys_: gitObjectInfo(wantRootOID)}, nil
})
gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, _ bool) ([]fs.FileInfo, error) {
gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, _ bool) (gitserver.ReadDirIterator, error) {
if commit != wantCommitID {
t.Errorf("got commit %q, want %q", commit, wantCommitID)
}
switch name {
case "":
return []fs.FileInfo{
return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{
&fileutil.FileInfo{Name_: "a", Mode_: os.ModeDir, Sys_: gitObjectInfo("oid-a")},
&fileutil.FileInfo{Name_: "b.go", Size_: 12},
}, nil
}), nil
case "a":
return []fs.FileInfo{&fileutil.FileInfo{Name_: "a/c.m", Size_: 24}}, nil
return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{&fileutil.FileInfo{Name_: "a/c.m", Size_: 24}}), nil
default:
panic("unhandled mock ReadDir " + name)
}
Expand Down
21 changes: 16 additions & 5 deletions cmd/frontend/graphqlbackend/git_commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package graphqlbackend

import (
"context"
"io"
"io/fs"
"net/url"
"os"
Expand Down Expand Up @@ -320,18 +321,28 @@ func (*rootTreeFileInfo) Size() int64 { return 0 }
func (*rootTreeFileInfo) Sys() any { return nil }

func (r *GitCommitResolver) FileNames(ctx context.Context) ([]string, error) {
fds, err := r.gitserverClient.ReadDir(ctx, r.gitRepo, api.CommitID(r.oid), "", true)
it, err := r.gitserverClient.ReadDir(ctx, r.gitRepo, api.CommitID(r.oid), "", true)
if err != nil {
return nil, err
}
defer it.Close()

names := make([]string, 0, len(fds))
for _, fd := range fds {
if fd.IsDir() {
names := make([]string, 0)

for {
entry, err := it.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return nil, err
}
if entry.IsDir() {
continue
}
names = append(names, fd.Name())
names = append(names, entry.Name())
}

return names, nil
}

Expand Down
8 changes: 4 additions & 4 deletions cmd/frontend/graphqlbackend/git_commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,12 @@ func TestGitCommitFileNames(t *testing.T) {
}
backend.Mocks.Repos.MockGetCommit_Return_NoCheck(t, &gitdomain.Commit{ID: exampleCommitSHA1})
gitserverClient := gitserver.NewMockClient()
gitserverClient.ReadDirFunc.SetDefaultReturn([]fs.FileInfo{
gitserverClient.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{
&fileutil.FileInfo{Name_: "a"},
&fileutil.FileInfo{Name_: "b"},
// We also return a dir to check that it's skipped in the output.
&fileutil.FileInfo{Name_: "dir", Mode_: os.ModeDir},
}, nil)
}), nil)
defer func() {
backend.Mocks = backend.MockServices{}
}()
Expand Down Expand Up @@ -340,12 +340,12 @@ func TestGitCommitAncestors(t *testing.T) {
backend.Mocks.Repos.MockGetCommit_Return_NoCheck(t, &gitdomain.Commit{ID: exampleCommitSHA1})

client := gitserver.NewMockClient()
client.ReadDirFunc.SetDefaultReturn([]fs.FileInfo{
client.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{
&fileutil.FileInfo{Name_: "a"},
&fileutil.FileInfo{Name_: "b"},
// We also return a dir to check that it's skipped in the output.
&fileutil.FileInfo{Name_: "dir", Mode_: os.ModeDir},
}, nil)
}), nil)

// A linear commit tree:
// * -> c1 -> c2 -> c3 -> c4 -> c5 (HEAD)
Expand Down
17 changes: 16 additions & 1 deletion cmd/frontend/graphqlbackend/git_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package graphqlbackend

import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
Expand Down Expand Up @@ -47,14 +48,28 @@ func (r *GitTreeEntryResolver) entries(ctx context.Context, args *gitTreeEntryCo
return nil, errors.Newf("invalid argument for first, must be non-negative")
}

entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), args.Recursive)
it, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), args.Recursive)
if err != nil {
if strings.Contains(err.Error(), "file does not exist") { // TODO proper error value
// empty tree is not an error
} else {
return nil, err
}
}
defer it.Close()

entries := make([]fs.FileInfo, 0)

for {
entry, err := it.Next()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return nil, err
}
entries = append(entries, entry)
}

// When using recursive: true on gitserverClient.ReadDir, we get entries for
// all parent trees (directories) from git as well, so we filter those out.
Expand Down
17 changes: 15 additions & 2 deletions cmd/frontend/graphqlbackend/git_tree_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,25 @@ func (r *GitTreeEntryResolver) IsSingleChild(ctx context.Context) (bool, error)
return false, nil
}

entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), path.Dir(r.Path()), false)
it, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), path.Dir(r.Path()), false)
if err != nil {
return false, err
}
defer it.Close()

return len(entries) == 1, nil
// Read the entry for the file we are interested in.
_, err = it.Next()
if err == nil {
return false, err
}

// Read one more entry. If that fails with io.EOF then we know we have a single child.
_, err = it.Next()
if err != io.EOF {
return false, err
}

return err == io.EOF, nil
}

func (r *GitTreeEntryResolver) LSIF(ctx context.Context, args *struct{ ToolName *string }) (resolverstubs.GitBlobLSIFDataResolver, error) {
Expand Down

0 comments on commit 76f8aca

Please sign in to comment.