diff --git a/cmd/frontend/backend/inventory.go b/cmd/frontend/backend/inventory.go index 1ad5d0068856..41330fada612 100644 --- a/cmd/frontend/backend/inventory.go +++ b/cmd/frontend/backend/inventory.go @@ -4,12 +4,14 @@ import ( "context" "encoding/json" "fmt" - "github.com/sourcegraph/sourcegraph/internal/trace" - "golang.org/x/sync/semaphore" "io" "io/fs" "strconv" + "golang.org/x/sync/semaphore" + + "github.com/sourcegraph/sourcegraph/internal/trace" + "github.com/sourcegraph/log" "github.com/sourcegraph/sourcegraph/cmd/frontend/internal/inventory" @@ -73,7 +75,24 @@ func InventoryContext(logger log.Logger, repo api.RepoName, gsClient gitserver.C } defer gitServerSemaphore.Release(1) // Using recurse=true does not yield a significant performance improvement. See https://github.com/sourcegraph/sourcegraph/pull/62011/files#r1577513913. - return gsClient.ReadDir(ctx, repo, commitID, path, false) + + fds := make([]fs.FileInfo, 0) + it, err := gsClient.ReadDir(ctx, repo, commitID, path, false) + if err != nil { + return nil, err + } + defer it.Close() + for { + fd, err := it.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, err + } + fds = append(fds, fd) + } + return fds, nil }, NewFileReader: func(ctx context.Context, path string) (io.ReadCloser, error) { trc, ctx := trace.New(ctx, "NewFileReader waits for semaphore") diff --git a/cmd/frontend/backend/repos_test.go b/cmd/frontend/backend/repos_test.go index d7f4f40f161c..7d1c75a0a66d 100644 --- a/cmd/frontend/backend/repos_test.go +++ b/cmd/frontend/backend/repos_test.go @@ -173,18 +173,18 @@ func TestReposGetInventory(t *testing.T) { } return &fileutil.FileInfo{Name_: path, Mode_: os.ModeDir, Sys_: gitObjectInfo(wantRootOID)}, nil }) - gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, _ bool) ([]fs.FileInfo, error) { + gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, _ bool) (gitserver.ReadDirIterator, error) { if commit != wantCommitID { t.Errorf("got commit %q, want %q", commit, wantCommitID) } switch name { case "": - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: "a", Mode_: os.ModeDir, Sys_: gitObjectInfo("oid-a")}, &fileutil.FileInfo{Name_: "b.go", Size_: 12}, - }, nil + }), nil case "a": - return []fs.FileInfo{&fileutil.FileInfo{Name_: "a/c.m", Size_: 24}}, nil + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{&fileutil.FileInfo{Name_: "a/c.m", Size_: 24}}), nil default: panic("unhandled mock ReadDir " + name) } diff --git a/cmd/frontend/graphqlbackend/git_commit.go b/cmd/frontend/graphqlbackend/git_commit.go index 047c43e14e70..782a8a430ed1 100644 --- a/cmd/frontend/graphqlbackend/git_commit.go +++ b/cmd/frontend/graphqlbackend/git_commit.go @@ -2,6 +2,7 @@ package graphqlbackend import ( "context" + "io" "io/fs" "net/url" "os" @@ -320,18 +321,28 @@ func (*rootTreeFileInfo) Size() int64 { return 0 } func (*rootTreeFileInfo) Sys() any { return nil } func (r *GitCommitResolver) FileNames(ctx context.Context) ([]string, error) { - fds, err := r.gitserverClient.ReadDir(ctx, r.gitRepo, api.CommitID(r.oid), "", true) + it, err := r.gitserverClient.ReadDir(ctx, r.gitRepo, api.CommitID(r.oid), "", true) if err != nil { return nil, err } + defer it.Close() - names := make([]string, 0, len(fds)) - for _, fd := range fds { - if fd.IsDir() { + names := make([]string, 0) + + for { + entry, err := it.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, err + } + if entry.IsDir() { continue } - names = append(names, fd.Name()) + names = append(names, entry.Name()) } + return names, nil } diff --git a/cmd/frontend/graphqlbackend/git_commit_test.go b/cmd/frontend/graphqlbackend/git_commit_test.go index 5250d9275ac3..32b213374501 100644 --- a/cmd/frontend/graphqlbackend/git_commit_test.go +++ b/cmd/frontend/graphqlbackend/git_commit_test.go @@ -291,12 +291,12 @@ func TestGitCommitFileNames(t *testing.T) { } backend.Mocks.Repos.MockGetCommit_Return_NoCheck(t, &gitdomain.Commit{ID: exampleCommitSHA1}) gitserverClient := gitserver.NewMockClient() - gitserverClient.ReadDirFunc.SetDefaultReturn([]fs.FileInfo{ + gitserverClient.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: "a"}, &fileutil.FileInfo{Name_: "b"}, // We also return a dir to check that it's skipped in the output. &fileutil.FileInfo{Name_: "dir", Mode_: os.ModeDir}, - }, nil) + }), nil) defer func() { backend.Mocks = backend.MockServices{} }() @@ -340,12 +340,12 @@ func TestGitCommitAncestors(t *testing.T) { backend.Mocks.Repos.MockGetCommit_Return_NoCheck(t, &gitdomain.Commit{ID: exampleCommitSHA1}) client := gitserver.NewMockClient() - client.ReadDirFunc.SetDefaultReturn([]fs.FileInfo{ + client.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: "a"}, &fileutil.FileInfo{Name_: "b"}, // We also return a dir to check that it's skipped in the output. &fileutil.FileInfo{Name_: "dir", Mode_: os.ModeDir}, - }, nil) + }), nil) // A linear commit tree: // * -> c1 -> c2 -> c3 -> c4 -> c5 (HEAD) diff --git a/cmd/frontend/graphqlbackend/git_tree.go b/cmd/frontend/graphqlbackend/git_tree.go index 2c1c249e9323..e7371e118c81 100644 --- a/cmd/frontend/graphqlbackend/git_tree.go +++ b/cmd/frontend/graphqlbackend/git_tree.go @@ -2,6 +2,7 @@ package graphqlbackend import ( "context" + "io" "io/fs" "path" "path/filepath" @@ -47,7 +48,7 @@ func (r *GitTreeEntryResolver) entries(ctx context.Context, args *gitTreeEntryCo return nil, errors.Newf("invalid argument for first, must be non-negative") } - entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), args.Recursive) + it, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), args.Recursive) if err != nil { if strings.Contains(err.Error(), "file does not exist") { // TODO proper error value // empty tree is not an error @@ -55,6 +56,20 @@ func (r *GitTreeEntryResolver) entries(ctx context.Context, args *gitTreeEntryCo return nil, err } } + defer it.Close() + + entries := make([]fs.FileInfo, 0) + + for { + entry, err := it.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + entries = append(entries, entry) + } // When using recursive: true on gitserverClient.ReadDir, we get entries for // all parent trees (directories) from git as well, so we filter those out. diff --git a/cmd/frontend/graphqlbackend/git_tree_entry.go b/cmd/frontend/graphqlbackend/git_tree_entry.go index 5a69eb1cd765..077d9534f9c2 100644 --- a/cmd/frontend/graphqlbackend/git_tree_entry.go +++ b/cmd/frontend/graphqlbackend/git_tree_entry.go @@ -431,12 +431,25 @@ func (r *GitTreeEntryResolver) IsSingleChild(ctx context.Context) (bool, error) return false, nil } - entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), path.Dir(r.Path()), false) + it, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), path.Dir(r.Path()), false) if err != nil { return false, err } + defer it.Close() - return len(entries) == 1, nil + // Read the entry for the file we are interested in. + _, err = it.Next() + if err == nil { + return false, err + } + + // Read one more entry. If that fails with io.EOF then we know we have a single child. + _, err = it.Next() + if err != io.EOF { + return false, err + } + + return err == io.EOF, nil } func (r *GitTreeEntryResolver) LSIF(ctx context.Context, args *struct{ ToolName *string }) (resolverstubs.GitBlobLSIFDataResolver, error) { diff --git a/cmd/frontend/graphqlbackend/git_tree_test.go b/cmd/frontend/graphqlbackend/git_tree_test.go index 606a4c7e81f8..d737caec6242 100644 --- a/cmd/frontend/graphqlbackend/git_tree_test.go +++ b/cmd/frontend/graphqlbackend/git_tree_test.go @@ -25,9 +25,9 @@ func TestGitTree_History(t *testing.T) { ctx := context.Background() gs := gitserver.NewMockClientFrom(gitserver.NewTestClient(t)) gs.ResolveRevisionFunc.SetDefaultReturn("deadbeef", nil) - gs.ReadDirFunc.SetDefaultReturn([]fs.FileInfo{ + gs.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: "file1"}, - }, nil) + }), nil) gs.CommitsFunc.SetDefaultReturn([]*gitdomain.Commit{ {ID: "deadbeef"}, }, nil) @@ -56,11 +56,11 @@ func TestGitTree_Entries(t *testing.T) { wantPath := "" - gitserverClient.ReadDirFunc.SetDefaultHook(func(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recursive bool) ([]fs.FileInfo, error) { + gitserverClient.ReadDirFunc.SetDefaultHook(func(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recursive bool) (gitserver.ReadDirIterator, error) { switch path { case "", ".", "/": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo(".aspect/rules/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/", true), @@ -78,17 +78,17 @@ func TestGitTree_Entries(t *testing.T) { CreateFileInfo("folder2/", true), CreateFileInfo("folder2/file", false), CreateFileInfo("file", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo("folder/", true), CreateFileInfo("folder2/", true), CreateFileInfo("file", false), - }, nil + }), nil case "folder/", "folder": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/", true), CreateFileInfo("folder/nestedfile", false), CreateFileInfo("folder/subfolder/", true), @@ -96,16 +96,16 @@ func TestGitTree_Entries(t *testing.T) { CreateFileInfo("folder/subfolder2/", true), CreateFileInfo("folder/subfolder2/file", false), CreateFileInfo("folder/subfolder2/file2", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/nestedfile", false), CreateFileInfo("folder/subfolder/", true), CreateFileInfo("folder/subfolder2/", true), - }, nil + }), nil case ".aspect/", ".aspect": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo(".aspect/rules/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/", true), @@ -113,83 +113,83 @@ func TestGitTree_Entries(t *testing.T) { CreateFileInfo(".aspect/cli/", true), CreateFileInfo(".aspect/cli/file1", false), CreateFileInfo(".aspect/cli/file2", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/rules/", true), CreateFileInfo(".aspect/cli/", true), - }, nil + }), nil case ".aspect/rules/", ".aspect/rules": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo(".aspect/rules/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/file", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/rules/external_repository_action_cache/", true), - }, nil + }), nil case ".aspect/rules/external_repository_action_cache/", ".aspect/rules/external_repository_action_cache": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo(".aspect/rules/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/", true), CreateFileInfo(".aspect/rules/external_repository_action_cache/file", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/rules/external_repository_action_cache/file", false), - }, nil + }), nil case ".aspect/cli/", ".aspect/cli": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/", true), CreateFileInfo(".aspect/cli/", true), CreateFileInfo(".aspect/cli/file1", false), CreateFileInfo(".aspect/cli/file2", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo(".aspect/cli/file1", false), CreateFileInfo(".aspect/cli/file2", false), - }, nil + }), nil case "folder/subfolder/", "folder/subfolder": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/", true), CreateFileInfo("folder/subfolder/", true), CreateFileInfo("folder/subfolder/deeplynestedfile", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/subfolder/deeplynestedfile", false), - }, nil + }), nil case "folder/subfolder2/", "folder/subfolder2": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/", true), CreateFileInfo("folder/subfolder2/", true), CreateFileInfo("folder/subfolder2/file", false), CreateFileInfo("folder/subfolder2/file2", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder/subfolder2/file", false), CreateFileInfo("folder/subfolder2/file2", false), - }, nil + }), nil case "folder2/", "folder2": if recursive { - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder2/", true), CreateFileInfo("folder2/file", false), - }, nil + }), nil } - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ CreateFileInfo("folder2/file", false), - }, nil + }), nil default: return nil, errors.Newf("bad argument %q", path) } @@ -443,16 +443,16 @@ func TestGitTree(t *testing.T) { func setupGitserverClient(t *testing.T) gitserver.Client { t.Helper() gsClient := gitserver.NewMockClient() - gsClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, recurse bool) ([]fs.FileInfo, error) { + gsClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, name string, recurse bool) (gitserver.ReadDirIterator, error) { assert.Equal(t, api.CommitID(exampleCommitSHA1), commit) assert.Equal(t, "foo bar", name) assert.False(t, recurse) - return []fs.FileInfo{ + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: name + "/testDirectory", Mode_: os.ModeDir}, &fileutil.FileInfo{Name_: name + "/Geoffrey's random queries.32r242442bf", Mode_: os.ModeDir}, &fileutil.FileInfo{Name_: name + "/testFile", Mode_: 0}, &fileutil.FileInfo{Name_: name + "/% token.4288249258.sql", Mode_: 0}, - }, nil + }), nil }) gsClient.StatFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, commit api.CommitID, path string) (fs.FileInfo, error) { assert.Equal(t, api.CommitID(exampleCommitSHA1), commit) diff --git a/cmd/frontend/graphqlbackend/search_results_stats_languages_test.go b/cmd/frontend/graphqlbackend/search_results_stats_languages_test.go index 2b06aa49c5bc..c8f6f4c30512 100644 --- a/cmd/frontend/graphqlbackend/search_results_stats_languages_test.go +++ b/cmd/frontend/graphqlbackend/search_results_stats_languages_test.go @@ -109,8 +109,8 @@ func TestSearchResultsStatsLanguages(t *testing.T) { } for name, test := range tests { t.Run(name, func(t *testing.T) { - gsClient.ReadDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { - return test.getFiles, nil + gsClient.ReadDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { + return gitserver.NewReadDirIteratorFromSlice(test.getFiles), nil }) langs, err := searchResultsStatsLanguages(context.Background(), logger, dbmocks.NewMockDB(), gsClient, test.results) diff --git a/cmd/frontend/internal/app/ui/raw.go b/cmd/frontend/internal/app/ui/raw.go index 41f7ce3e9b33..6c6b6d299a82 100644 --- a/cmd/frontend/internal/app/ui/raw.go +++ b/cmd/frontend/internal/app/ui/raw.go @@ -24,6 +24,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/database" "github.com/sourcegraph/sourcegraph/internal/errcode" "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/lib/errors" ) // Examples: @@ -258,13 +259,21 @@ func serveRaw(logger log.Logger, db database.DB, gitserverClient gitserver.Clien if fi.IsDir() { requestType = "dir" - infos, err := gitserverClient.ReadDir(r.Context(), common.Repo.Name, common.CommitID, requestedPath, false) + it, err := gitserverClient.ReadDir(r.Context(), common.Repo.Name, common.CommitID, requestedPath, false) if err != nil { return err } - size = int64(len(infos)) + defer it.Close() + var names []string - for _, info := range infos { + for { + info, err := it.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } // A previous version of this code returned relative paths so we trim the paths // here too so as not to break backwards compatibility name := path.Base(info.Name()) diff --git a/cmd/frontend/internal/app/ui/raw_test.go b/cmd/frontend/internal/app/ui/raw_test.go index 8217435cd52a..02dd7cd186a0 100644 --- a/cmd/frontend/internal/app/ui/raw_test.go +++ b/cmd/frontend/internal/app/ui/raw_test.go @@ -281,12 +281,12 @@ func Test_serveRawWithContentTypePlain(t *testing.T) { gsClient := gitserver.NewMockClient() gsClient.StatFunc.SetDefaultReturn(&fileutil.FileInfo{Mode_: os.ModeDir}, nil) - gsClient.ReadDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { - return []fs.FileInfo{ + gsClient.ReadDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ &fileutil.FileInfo{Name_: "test/a", Mode_: os.ModeDir}, &fileutil.FileInfo{Name_: "test/b", Mode_: os.ModeDir}, &fileutil.FileInfo{Name_: "c.go", Mode_: 0}, - }, nil + }), nil }) req := httptest.NewRequest("GET", "/github.com/sourcegraph/sourcegraph/-/raw", nil) diff --git a/cmd/gitserver/internal/integration_tests/tree_test.go b/cmd/gitserver/internal/integration_tests/tree_test.go index fee48b12f4e7..927933a6db15 100644 --- a/cmd/gitserver/internal/integration_tests/tree_test.go +++ b/cmd/gitserver/internal/integration_tests/tree_test.go @@ -2,9 +2,12 @@ package inttests import ( "context" + "io" + "io/fs" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/sourcegraph/sourcegraph/internal/actor" "github.com/sourcegraph/sourcegraph/internal/api" @@ -52,11 +55,21 @@ func TestReadDir_SubRepoFiltering(t *testing.T) { source := gitserver.NewTestClientSource(t, GitserverAddresses) client := gitserver.NewTestClient(t).WithClientSource(source).WithChecker(checker) - files, err := client.ReadDir(ctx, repo, commitID, "", false) + it, err := client.ReadDir(ctx, repo, commitID, "", false) if err != nil { t.Fatalf("unexpected error: %s", err) } + files := []fs.FileInfo{} + for { + f, err := it.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + files = append(files, f) + } + // Because we have a wildcard matcher we still allow directory visibility assert.Len(t, files, 1) assert.Equal(t, "file1", files[0].Name()) diff --git a/cmd/worker/internal/embeddings/repo/handler.go b/cmd/worker/internal/embeddings/repo/handler.go index ef0f1d0e6d3e..d97e56c997cf 100644 --- a/cmd/worker/internal/embeddings/repo/handler.go +++ b/cmd/worker/internal/embeddings/repo/handler.go @@ -210,13 +210,22 @@ func (r *revisionFetcher) Read(ctx context.Context, fileName string) ([]byte, er } func (r *revisionFetcher) List(ctx context.Context) ([]embed.FileEntry, error) { - fileInfos, err := r.gitserver.ReadDir(ctx, r.repo, r.revision, "", true) + it, err := r.gitserver.ReadDir(ctx, r.repo, r.revision, "", true) if err != nil { return nil, err } + defer it.Close() + + entries := make([]embed.FileEntry, 0) + for { + fileInfo, err := it.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } - entries := make([]embed.FileEntry, 0, len(fileInfos)) - for _, fileInfo := range fileInfos { if !fileInfo.IsDir() { entries = append(entries, embed.FileEntry{ Name: fileInfo.Name(), diff --git a/cmd/worker/internal/embeddings/repo/handler_test.go b/cmd/worker/internal/embeddings/repo/handler_test.go index b7c55735b208..ea88969e4f71 100644 --- a/cmd/worker/internal/embeddings/repo/handler_test.go +++ b/cmd/worker/internal/embeddings/repo/handler_test.go @@ -32,8 +32,8 @@ func TestDiff(t *testing.T) { }) readDirFunc := &gitserver.ClientReadDirFunc{} - readDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { - return []fs.FileInfo{ + readDirFunc.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { + return gitserver.NewReadDirIteratorFromSlice([]fs.FileInfo{ FakeFileInfo{ name: "modifiedFile", size: 900, @@ -50,7 +50,7 @@ func TestDiff(t *testing.T) { name: "anotherFile", size: 1200, }, - }, nil + }), nil }) mockGitServer := &gitserver.MockClient{ diff --git a/internal/batches/sources/mocks_test.go b/internal/batches/sources/mocks_test.go index 3bc99faa4d22..b4eae7d44dba 100644 --- a/internal/batches/sources/mocks_test.go +++ b/internal/batches/sources/mocks_test.go @@ -11225,7 +11225,7 @@ func NewMockGitserverClient() *MockGitserverClient { }, }, ReadDirFunc: &GitserverClientReadDirFunc{ - defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (r0 []fs.FileInfo, r1 error) { + defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (r0 gitserver.ReadDirIterator, r1 error) { return }, }, @@ -11412,7 +11412,7 @@ func NewStrictMockGitserverClient() *MockGitserverClient { }, }, ReadDirFunc: &GitserverClientReadDirFunc{ - defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { + defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { panic("unexpected invocation of MockGitserverClient.ReadDir") }, }, @@ -14497,15 +14497,15 @@ func (c GitserverClientPerforceUsersFuncCall) Results() []interface{} { // GitserverClientReadDirFunc describes the behavior when the ReadDir method // of the parent MockGitserverClient instance is invoked. type GitserverClientReadDirFunc struct { - defaultHook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) - hooks []func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) + defaultHook func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) + hooks []func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) history []GitserverClientReadDirFuncCall mutex sync.Mutex } // ReadDir delegates to the next hook function in the queue and stores the // parameter and result values of this invocation. -func (m *MockGitserverClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 string, v4 bool) ([]fs.FileInfo, error) { +func (m *MockGitserverClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 string, v4 bool) (gitserver.ReadDirIterator, error) { r0, r1 := m.ReadDirFunc.nextHook()(v0, v1, v2, v3, v4) m.ReadDirFunc.appendCall(GitserverClientReadDirFuncCall{v0, v1, v2, v3, v4, r0, r1}) return r0, r1 @@ -14514,7 +14514,7 @@ func (m *MockGitserverClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 ap // SetDefaultHook sets function that is called when the ReadDir method of // the parent MockGitserverClient instance is invoked and the hook queue is // empty. -func (f *GitserverClientReadDirFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error)) { +func (f *GitserverClientReadDirFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error)) { f.defaultHook = hook } @@ -14522,7 +14522,7 @@ func (f *GitserverClientReadDirFunc) SetDefaultHook(hook func(context.Context, a // ReadDir method of the parent MockGitserverClient instance invokes the // hook at the front of the queue and discards it. After the queue is empty, // the default hook function is invoked for any future action. -func (f *GitserverClientReadDirFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error)) { +func (f *GitserverClientReadDirFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error)) { f.mutex.Lock() f.hooks = append(f.hooks, hook) f.mutex.Unlock() @@ -14530,20 +14530,20 @@ func (f *GitserverClientReadDirFunc) PushHook(hook func(context.Context, api.Rep // SetDefaultReturn calls SetDefaultHook with a function that returns the // given values. -func (f *GitserverClientReadDirFunc) SetDefaultReturn(r0 []fs.FileInfo, r1 error) { - f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *GitserverClientReadDirFunc) SetDefaultReturn(r0 gitserver.ReadDirIterator, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { return r0, r1 }) } // PushReturn calls PushHook with a function that returns the given values. -func (f *GitserverClientReadDirFunc) PushReturn(r0 []fs.FileInfo, r1 error) { - f.PushHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *GitserverClientReadDirFunc) PushReturn(r0 gitserver.ReadDirIterator, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { return r0, r1 }) } -func (f *GitserverClientReadDirFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *GitserverClientReadDirFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, string, bool) (gitserver.ReadDirIterator, error) { f.mutex.Lock() defer f.mutex.Unlock() @@ -14593,7 +14593,7 @@ type GitserverClientReadDirFuncCall struct { Arg4 bool // Result0 is the value of the 1st result returned from this method // invocation. - Result0 []fs.FileInfo + Result0 gitserver.ReadDirIterator // Result1 is the value of the 2nd result returned from this method // invocation. Result1 error diff --git a/internal/codeintel/autoindexing/internal/inference/iface.go b/internal/codeintel/autoindexing/internal/inference/iface.go index 78163236849d..16bcb250cc6f 100644 --- a/internal/codeintel/autoindexing/internal/inference/iface.go +++ b/internal/codeintel/autoindexing/internal/inference/iface.go @@ -8,6 +8,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/gitserver" "github.com/sourcegraph/sourcegraph/internal/luasandbox" + "github.com/sourcegraph/sourcegraph/lib/errors" ) type SandboxService interface { @@ -30,7 +31,24 @@ func NewDefaultGitService() GitService { } func (s *gitService) ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recurse bool) ([]fs.FileInfo, error) { - return s.client.ReadDir(ctx, repo, commit, path, recurse) + it, err := s.client.ReadDir(ctx, repo, commit, path, recurse) + if err != nil { + return nil, err + } + it.Close() + + files := make([]fs.FileInfo, 0) + for { + file, err := it.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, err + } + files = append(files, file) + } + return files, nil } func (s *gitService) Archive(ctx context.Context, repo api.RepoName, opts gitserver.ArchiveOptions) (io.ReadCloser, error) { diff --git a/internal/codeintel/uploads/internal/background/processor/job_worker_handler.go b/internal/codeintel/uploads/internal/background/processor/job_worker_handler.go index 822110f50688..1747e67f0dc3 100644 --- a/internal/codeintel/uploads/internal/background/processor/job_worker_handler.go +++ b/internal/codeintel/uploads/internal/background/processor/job_worker_handler.go @@ -224,11 +224,20 @@ func (h *handler) HandleRawUpload(ctx context.Context, logger log.Logger, upload allFDs := make([]fs.FileInfo, 0) seen := make(map[string]struct{}) for _, d := range dirnames { - fds, err := h.gitserverClient.ReadDir(ctx, repo.Name, api.CommitID(upload.Commit), d, false) + it, err := h.gitserverClient.ReadDir(ctx, repo.Name, api.CommitID(upload.Commit), d, false) if err != nil { return nil, errors.Wrap(err, "gitserverClient.ReadDir") } - for _, fd := range fds { + defer it.Close() + + for { + fd, err := it.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, errors.Wrap(err, "gitserverClient.ReadDir.Next") + } if fd.IsDir() { continue } diff --git a/internal/codeintel/uploads/internal/background/processor/job_worker_handler_test.go b/internal/codeintel/uploads/internal/background/processor/job_worker_handler_test.go index 3818bced13cd..30df37a10c67 100644 --- a/internal/codeintel/uploads/internal/background/processor/job_worker_handler_test.go +++ b/internal/codeintel/uploads/internal/background/processor/job_worker_handler_test.go @@ -72,16 +72,16 @@ func TestHandle(t *testing.T) { mockUploadStore.GetFunc.SetDefaultHook(copyTestDumpScip) // Allowlist all files in dump - gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, _ api.CommitID, path string, _ bool) ([]fs.FileInfo, error) { + gitserverClient.ReadDirFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, _ api.CommitID, path string, _ bool) (gitserver.ReadDirIterator, error) { children, ok := scipDirectoryChildren[path] if !ok { - return nil, nil + return gitserver.NewReadDirIteratorFromSlice(nil), nil } fis := make([]fs.FileInfo, 0, len(children)) for _, c := range children { fis = append(fis, &fileutil.FileInfo{Name_: c}) } - return fis, nil + return gitserver.NewReadDirIteratorFromSlice(fis), nil }) expectedCommitDate := time.Unix(1587396557, 0).UTC() @@ -324,6 +324,8 @@ func TestHandleError(t *testing.T) { }, nil }) + gitserverClient.ReadDirFunc.SetDefaultReturn(gitserver.NewReadDirIteratorFromSlice(nil), nil) + // Set a different tip commit mockDBStore.SetRepositoryAsDirtyFunc.SetDefaultReturn(errors.Errorf("uh-oh!")) diff --git a/internal/gitserver/client.go b/internal/gitserver/client.go index f86c29b89147..a3b68570fa59 100644 --- a/internal/gitserver/client.go +++ b/internal/gitserver/client.go @@ -411,7 +411,7 @@ type Client interface { Stat(ctx context.Context, repo api.RepoName, commit api.CommitID, path string) (fs.FileInfo, error) // ReadDir reads the contents of the named directory at commit. - ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recurse bool) ([]fs.FileInfo, error) + ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recurse bool) (ReadDirIterator, error) // NewFileReader returns an io.ReadCloser reading from the named file at commit. // The caller should always close the reader after use. @@ -1164,6 +1164,25 @@ func (c *clientImplementor) ListGitoliteRepos(ctx context.Context, gitoliteHost return list, nil } +// ReadDirIterator is an iterator for retrieving the file descriptors of files/dirs +// in a Git repository. +// +// The caller must ensure that they call Close() when the iterator is no longer +// needed to release any associated resources. +type ReadDirIterator interface { + // Next returns the next file descriptor. + // + // If there are no more files, Next returns an io.EOF error. + // If an error occurs during iteration, Next returns the error that occurred. + Next() (fs.FileInfo, error) + + // Close closes the iterator and releases any associated resources. + // + // It is important to call Close when the iterator is no longer needed to avoid resource leaks. + // After calling Close, any subsequent calls to Next will return an io.EOF error. + Close() +} + // ChangedFilesIterator is an iterator for retrieving the status of changed files in a Git repository. // It allows iterating over the changed files and retrieving their paths and statuses. // @@ -1220,3 +1239,42 @@ func (c *changedFilesSliceIterator) Close() { } var _ ChangedFilesIterator = &changedFilesSliceIterator{} + +// NewReadDirIteratorFromSlice returns a new ReadDirIterator that iterates over +// the given slice which is useful for testing. +func NewReadDirIteratorFromSlice(fds []fs.FileInfo) ReadDirIterator { + return &readDirSliceIterator{fds: fds} +} + +type readDirSliceIterator struct { + mu sync.Mutex + fds []fs.FileInfo + closed bool +} + +func (c *readDirSliceIterator) Next() (fs.FileInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.closed { + return nil, io.EOF + } + + if len(c.fds) == 0 { + return nil, io.EOF + } + + fd := c.fds[0] + c.fds = c.fds[1:] + + return fd, nil +} + +func (c *readDirSliceIterator) Close() { + c.mu.Lock() + defer c.mu.Unlock() + + c.closed = true +} + +var _ ReadDirIterator = &readDirSliceIterator{} diff --git a/internal/gitserver/commands.go b/internal/gitserver/commands.go index 06ebf2e5e83d..db814b7cfb2e 100644 --- a/internal/gitserver/commands.go +++ b/internal/gitserver/commands.go @@ -342,7 +342,7 @@ func (i *changedFilesIterator) Close() { }) } -func (c *clientImplementor) ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recurse bool) (_ []fs.FileInfo, err error) { +func (c *clientImplementor) ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recurse bool) (_ ReadDirIterator, err error) { ctx, _, endObservation := c.operations.readDir.With(ctx, &err, observation.Args{ MetricLabelValues: []string{c.scope}, Attrs: []attribute.KeyValue{ @@ -352,77 +352,124 @@ func (c *clientImplementor) ReadDir(ctx context.Context, repo api.RepoName, comm attribute.Bool("recurse", recurse), }, }) - defer endObservation(1, observation.Args{}) client, err := c.clientSource.ClientForRepo(ctx, repo) if err != nil { + endObservation(1, observation.Args{}) return nil, err } - res, err := client.ReadDir(ctx, &proto.ReadDirRequest{ + // We create a context here to have a way to cancel the RPC if the caller + // doesn't read the entire result. + ctx, cancel := context.WithCancel(ctx) + cc, err := client.ReadDir(ctx, &proto.ReadDirRequest{ RepoName: string(repo), CommitSha: string(commit), Path: []byte(path), Recursive: recurse, }) if err != nil { + cancel() + endObservation(1, observation.Args{}) return nil, err } - fis := []fs.FileInfo{} + // We receive the first chunk here so that we can return an error if the + // file/path is not found. + firstChunk, err := cc.Recv() + if err != nil { + defer endObservation(1, observation.Args{}) + defer cancel() - for { - chunk, err := res.Recv() - if err == io.EOF { - break - } - if err != nil { - if s, ok := status.FromError(err); ok { - // If sub repo permissions deny access to the file, we return os.ErrNotExist. - if s.Code() == codes.NotFound { - for _, d := range s.Details() { - fp, ok := d.(*proto.FileNotFoundPayload) - if ok { - return nil, &os.PathError{Op: "open", Path: fp.Path, Err: os.ErrNotExist} - } + if s, ok := status.FromError(err); ok { + if s.Code() == codes.NotFound { + for _, d := range s.Details() { + fp, ok := d.(*proto.FileNotFoundPayload) + if ok { + return nil, &os.PathError{Op: "open", Path: fp.Path, Err: os.ErrNotExist} } } } - - return nil, err - } - for _, fi := range chunk.GetFileInfo() { - fis = append(fis, gitdomain.ProtoFileInfoToFS(fi)) } + + return nil, err } - if err != nil || !authz.SubRepoEnabled(c.subRepoPermsChecker) { - return fis, err + return &readDirIterator{ + ctx: ctx, + firstChunk: firstChunk, + cc: cc, + onClose: func() { + cancel() + endObservation(1, observation.Args{}) + }, + subRepoPermsChecker: c.subRepoPermsChecker, + repo: repo, + }, nil +} + +type readDirIterator struct { + ctx context.Context + firstChunk *proto.ReadDirResponse + firstChunkConsumed bool + cc proto.GitserverService_ReadDirClient + onClose func() + subRepoPermsChecker authz.SubRepoPermissionChecker + repo api.RepoName + buffer []fs.FileInfo +} + +func (i *readDirIterator) Next() (fs.FileInfo, error) { + if len(i.buffer) == 0 { + for { + if i.ctx.Err() != nil { + return nil, i.ctx.Err() + } + + chunk, err := i.fetchChunk() + if err != nil { + return nil, err + } + fds := []fs.FileInfo{} + for _, f := range chunk.GetFileInfo() { + fds = append(fds, gitdomain.ProtoFileInfoToFS(f)) + } + if authz.SubRepoEnabled(i.subRepoPermsChecker) { + a := actor.FromContext(i.ctx) + filtered, filteringErr := authz.FilterActorFileInfos(i.ctx, i.subRepoPermsChecker, a, i.repo, fds) + if filteringErr != nil { + return nil, errors.Wrap(err, "filtering paths") + } + i.buffer = filtered + } else { + i.buffer = fds + } + if len(i.buffer) > 0 { + break + } + } } - a := actor.FromContext(ctx) - filtered, filteringErr := authz.FilterActorFileInfos(ctx, c.subRepoPermsChecker, a, repo, fis) - if filteringErr != nil { - return nil, errors.Wrap(err, "filtering paths") - } else { - return filtered, nil + if len(i.buffer) == 0 { + return nil, io.EOF } + + fd := i.buffer[0] + i.buffer = i.buffer[1:] + + return fd, nil } -func pathspecsToStrings(pathspecs []gitdomain.Pathspec) []string { - var strings []string - for _, pathspec := range pathspecs { - strings = append(strings, string(pathspec)) +func (i *readDirIterator) fetchChunk() (*proto.ReadDirResponse, error) { + if !i.firstChunkConsumed { + i.firstChunkConsumed = true + return i.firstChunk, nil } - return strings + return i.cc.Recv() } -func pathspecsToBytes(pathspecs []gitdomain.Pathspec) [][]byte { - var s [][]byte - for _, pathspec := range pathspecs { - s = append(s, []byte(pathspec)) - } - return s +func (i *readDirIterator) Close() { + i.onClose() } func (c *clientImplementor) LogReverseEach(ctx context.Context, repo string, commit string, n int, onLogEntry func(entry gitdomain.LogEntry) error) (err error) { diff --git a/internal/gitserver/commands_test.go b/internal/gitserver/commands_test.go index 2ac0f973b1fc..ee92e6adbfb2 100644 --- a/internal/gitserver/commands_test.go +++ b/internal/gitserver/commands_test.go @@ -2187,10 +2187,65 @@ func TestClient_ReadDir(t *testing.T) { c := NewTestClient(t).WithClientSource(source) - res, err := c.ReadDir(context.Background(), "repo", "HEAD", "", true) + it, err := c.ReadDir(context.Background(), "repo", "HEAD", "", true) require.NoError(t, err) - require.Equal(t, "file", res[0].Name()) - require.Equal(t, "dir/file", res[1].Name()) + + fd, err := it.Next() + require.NoError(t, err) + require.Equal(t, "file", fd.Name()) + + fd, err = it.Next() + require.NoError(t, err) + require.Equal(t, "dir/file", fd.Name()) + + _, err = it.Next() + require.Equal(t, io.EOF, err) + + it.Close() + }) + + t.Run("correctly memoizes multiple results in one chunk", func(t *testing.T) { + source := NewTestClientSource(t, []string{"gitserver"}, func(o *TestClientSourceOptions) { + o.ClientFunc = func(cc *grpc.ClientConn) proto.GitserverServiceClient { + c := NewMockGitserverServiceClient() + s := NewMockGitserverService_ReadDirClient() + s.RecvFunc.PushReturn(&proto.ReadDirResponse{ + FileInfo: []*proto.FileInfo{ + { + Name: []byte("file"), + Size: 10, + Mode: 0644, + }, + { + Name: []byte("dir/file"), + Size: 12, + Mode: 0644, + }, + }, + }, nil) + s.RecvFunc.PushReturn(nil, io.EOF) + c.ReadDirFunc.SetDefaultReturn(s, nil) + return c + } + }) + + c := NewTestClient(t).WithClientSource(source) + + it, err := c.ReadDir(context.Background(), "repo", "HEAD", "", true) + require.NoError(t, err) + + fd, err := it.Next() + require.NoError(t, err) + require.Equal(t, "file", fd.Name()) + + fd, err = it.Next() + require.NoError(t, err) + require.Equal(t, "dir/file", fd.Name()) + + _, err = it.Next() + require.Equal(t, io.EOF, err) + + it.Close() }) t.Run("returns common errors correctly", func(t *testing.T) { @@ -2274,9 +2329,14 @@ func TestClient_ReadDir(t *testing.T) { checker := getTestSubRepoPermsChecker("file") c := NewTestClient(t).WithClientSource(source).WithChecker(checker) - res, err := c.ReadDir(ctx, "repo", "HEAD", "file", true) + it, err := c.ReadDir(ctx, "repo", "HEAD", "file", true) + require.NoError(t, err) + fd, err := it.Next() require.NoError(t, err) - require.Len(t, res, 1) - require.Equal(t, "dir/file", res[0].Name()) + require.Equal(t, "dir/file", fd.Name()) + _, err = it.Next() + require.Equal(t, io.EOF, err) + + it.Close() }) } diff --git a/internal/gitserver/mocks_temp.go b/internal/gitserver/mocks_temp.go index a4ef685c9b29..cdadaf142ca3 100644 --- a/internal/gitserver/mocks_temp.go +++ b/internal/gitserver/mocks_temp.go @@ -269,7 +269,7 @@ func NewMockClient() *MockClient { }, }, ReadDirFunc: &ClientReadDirFunc{ - defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (r0 []fs.FileInfo, r1 error) { + defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (r0 ReadDirIterator, r1 error) { return }, }, @@ -456,7 +456,7 @@ func NewStrictMockClient() *MockClient { }, }, ReadDirFunc: &ClientReadDirFunc{ - defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { + defaultHook: func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) { panic("unexpected invocation of MockClient.ReadDir") }, }, @@ -3489,15 +3489,15 @@ func (c ClientPerforceUsersFuncCall) Results() []interface{} { // ClientReadDirFunc describes the behavior when the ReadDir method of the // parent MockClient instance is invoked. type ClientReadDirFunc struct { - defaultHook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) - hooks []func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) + defaultHook func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) + hooks []func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) history []ClientReadDirFuncCall mutex sync.Mutex } // ReadDir delegates to the next hook function in the queue and stores the // parameter and result values of this invocation. -func (m *MockClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 string, v4 bool) ([]fs.FileInfo, error) { +func (m *MockClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 string, v4 bool) (ReadDirIterator, error) { r0, r1 := m.ReadDirFunc.nextHook()(v0, v1, v2, v3, v4) m.ReadDirFunc.appendCall(ClientReadDirFuncCall{v0, v1, v2, v3, v4, r0, r1}) return r0, r1 @@ -3505,7 +3505,7 @@ func (m *MockClient) ReadDir(v0 context.Context, v1 api.RepoName, v2 api.CommitI // SetDefaultHook sets function that is called when the ReadDir method of // the parent MockClient instance is invoked and the hook queue is empty. -func (f *ClientReadDirFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error)) { +func (f *ClientReadDirFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error)) { f.defaultHook = hook } @@ -3513,7 +3513,7 @@ func (f *ClientReadDirFunc) SetDefaultHook(hook func(context.Context, api.RepoNa // ReadDir method of the parent MockClient instance invokes the hook at the // front of the queue and discards it. After the queue is empty, the default // hook function is invoked for any future action. -func (f *ClientReadDirFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error)) { +func (f *ClientReadDirFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error)) { f.mutex.Lock() f.hooks = append(f.hooks, hook) f.mutex.Unlock() @@ -3521,20 +3521,20 @@ func (f *ClientReadDirFunc) PushHook(hook func(context.Context, api.RepoName, ap // SetDefaultReturn calls SetDefaultHook with a function that returns the // given values. -func (f *ClientReadDirFunc) SetDefaultReturn(r0 []fs.FileInfo, r1 error) { - f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *ClientReadDirFunc) SetDefaultReturn(r0 ReadDirIterator, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) { return r0, r1 }) } // PushReturn calls PushHook with a function that returns the given values. -func (f *ClientReadDirFunc) PushReturn(r0 []fs.FileInfo, r1 error) { - f.PushHook(func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *ClientReadDirFunc) PushReturn(r0 ReadDirIterator, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) { return r0, r1 }) } -func (f *ClientReadDirFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, string, bool) ([]fs.FileInfo, error) { +func (f *ClientReadDirFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, string, bool) (ReadDirIterator, error) { f.mutex.Lock() defer f.mutex.Unlock() @@ -3584,7 +3584,7 @@ type ClientReadDirFuncCall struct { Arg4 bool // Result0 is the value of the 1st result returned from this method // invocation. - Result0 []fs.FileInfo + Result0 ReadDirIterator // Result1 is the value of the 2nd result returned from this method // invocation. Result1 error diff --git a/internal/own/background/analytics.go b/internal/own/background/analytics.go index b8955b177fb1..b34da35b506b 100644 --- a/internal/own/background/analytics.go +++ b/internal/own/background/analytics.go @@ -2,6 +2,7 @@ package background import ( "context" + "io" "time" "github.com/prometheus/client_golang/prometheus" @@ -66,15 +67,24 @@ func (r *analyticsIndexer) indexRepo(ctx context.Context, repoId api.RepoID, che if err != nil { return errcode.MakeNonRetryable(errors.Wrapf(err, "cannot resolve HEAD")) } - files, err := r.client.ReadDir(ctx, repo.Name, commitID, "", true) + it, err := r.client.ReadDir(ctx, repo.Name, commitID, "", true) if err != nil { return errors.Wrap(err, "ls-tree") } + defer it.Close() + isOwnedViaCodeowners := r.codeowners(ctx, repo, commitID) isOwnedViaAssignedOwnership := r.assignedOwners(ctx, repo, commitID) var totalCount int var ownCounts database.PathAggregateCounts - for _, f := range files { + for { + f, err := it.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } if f.IsDir() { continue } diff --git a/internal/own/background/analytics_test.go b/internal/own/background/analytics_test.go index 227933a486aa..d35b4e6fd3e6 100644 --- a/internal/own/background/analytics_test.go +++ b/internal/own/background/analytics_test.go @@ -30,14 +30,14 @@ type fakeGitServer struct { fileContents map[string]string } -func (f fakeGitServer) ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recursive bool) ([]fs.FileInfo, error) { +func (f fakeGitServer) ReadDir(ctx context.Context, repo api.RepoName, commit api.CommitID, path string, recursive bool) (gitserver.ReadDirIterator, error) { fis := make([]fs.FileInfo, 0, len(f.files)) for _, file := range f.files { fis = append(fis, &fileutil.FileInfo{ Name_: file, }) } - return fis, nil + return gitserver.NewReadDirIteratorFromSlice(fis), nil } func (f fakeGitServer) ResolveRevision(ctx context.Context, repo api.RepoName, spec string, opt gitserver.ResolveRevisionOptions) (api.CommitID, error) {