diff --git a/CHANGELOG.md b/CHANGELOG.md index d4814a2379b3..ad9046c745dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ All notable changes to Sourcegraph are documented in this file. ### Fixed -- +- Fixed a bug in gitserver where it was possible to use expired Github App authorization tokens when syncing a large repository. Now, gitserver will use the latest tokens for each step of the syncing process (and refresh them if necessary). [#61179](https://github.com/sourcegraph/sourcegraph/pull/61179/) ### Removed diff --git a/cmd/gitserver/internal/executil/executil.go b/cmd/gitserver/internal/executil/executil.go index 58cb1a4cbb10..26ff4280d8fd 100644 --- a/cmd/gitserver/internal/executil/executil.go +++ b/cmd/gitserver/internal/executil/executil.go @@ -347,6 +347,11 @@ func RunCommandWriteOutput(ctx context.Context, cmd wrexec.Cmder, writer io.Writ } err = cmd.Wait() + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + // Redact the stderr output if we have it. + exitErr.Stderr = []byte(redactor(string(exitErr.Stderr))) + } if ps := cmd.Unwrap().ProcessState; ps != nil && ps.Sys() != nil { if ws, ok := ps.Sys().(syscall.WaitStatus); ok { diff --git a/cmd/gitserver/internal/integration_tests/BUILD.bazel b/cmd/gitserver/internal/integration_tests/BUILD.bazel index 10c740d19f1f..00cb84363782 100644 --- a/cmd/gitserver/internal/integration_tests/BUILD.bazel +++ b/cmd/gitserver/internal/integration_tests/BUILD.bazel @@ -25,7 +25,9 @@ go_library( "//internal/observation", "//internal/ratelimit", "//internal/types", + "//internal/vcs", "//internal/wrexec", + "//lib/errors", "@com_github_sourcegraph_log//:log", "@com_github_sourcegraph_log//logtest", "@org_golang_x_sync//semaphore", diff --git a/cmd/gitserver/internal/integration_tests/archivereader_test.go b/cmd/gitserver/internal/integration_tests/archivereader_test.go index ba9b01cb1042..4f99920d63ca 100644 --- a/cmd/gitserver/internal/integration_tests/archivereader_test.go +++ b/cmd/gitserver/internal/integration_tests/archivereader_test.go @@ -6,6 +6,7 @@ import ( "context" "encoding/base64" "fmt" + "github.com/sourcegraph/sourcegraph/internal/vcs" "io" "net/http/httptest" "net/url" @@ -96,6 +97,14 @@ func TestClient_ArchiveReader(t *testing.T) { runArchiveReaderTestfunc := func(t *testing.T, mkClient func(t *testing.T, addrs []string) gitserver.Client, name api.RepoName, test test) { t.Run(string(name), func(t *testing.T) { // Setup: Prepare the test Gitserver server + register the gRPC server + + getRemoteURLFunc := func(_ context.Context, name api.RepoName) (string, error) { + if test.remote != "" { + return test.remote, nil + } + return "", errors.Errorf("no remote for %s", test.name) + } + s := &server.Server{ Logger: logtest.Scoped(t), ReposDir: filepath.Join(root, "repos"), @@ -103,14 +112,27 @@ func TestClient_ArchiveReader(t *testing.T) { GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(_ context.Context, name api.RepoName) (string, error) { - if test.remote != "" { - return test.remote, nil - } - return "", errors.Errorf("no remote for %s", test.name) - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { - return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory()), nil + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + source := vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %s", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse remote URL for repository: %s, raw: %s", name, raw) + } + + return u, nil + }) + + return source, nil + } + + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, RecordingCommandFactory: wrexec.NewNoOpRecordingCommandFactory(), Locker: server.NewRepositoryLocker(), diff --git a/cmd/gitserver/internal/integration_tests/clone_test.go b/cmd/gitserver/internal/integration_tests/clone_test.go index 54729435a0a4..9f787c62fdda 100644 --- a/cmd/gitserver/internal/integration_tests/clone_test.go +++ b/cmd/gitserver/internal/integration_tests/clone_test.go @@ -3,6 +3,7 @@ package inttests import ( "container/list" "context" + "github.com/sourcegraph/sourcegraph/lib/errors" "net/http/httptest" "net/url" "os" @@ -51,19 +52,37 @@ func TestClone(t *testing.T) { lock := NewMockRepositoryLock() locker.TryAcquireFunc.SetDefaultReturn(lock, true) + getRemoteURLFunc := func(_ context.Context, name api.RepoName) (string, error) { //nolint:unparam + require.Equal(t, repo, name) + return remote, nil + } + s := server.Server{ Logger: logger, ReposDir: reposDir, GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(_ context.Context, name api.RepoName) (string, error) { - require.Equal(t, repo, name) - return remote, nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + require.Equal(t, repo, name) + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %s", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse remote URL %q", raw) + } + return u, nil + }), nil + } + require.Equal(t, repo, name) - return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory()), nil + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, DB: db, Perforce: perforce.NewService(ctx, observation.TestContextTB(t), logger, db, list.New()), @@ -93,10 +112,10 @@ func TestClone(t *testing.T) { // Should have acquired a lock. mockassert.CalledOnce(t, locker.TryAcquireFunc) - // Should have reported status. 21 lines is the output git currently produces. + // Should have reported status. 24 lines is the output git currently produces. // This number might need to be adjusted over time, but before doing so please // check that the calls actually use the args you would expect them to use. - mockassert.CalledN(t, lock.SetStatusFunc, 21) + mockassert.CalledN(t, lock.SetStatusFunc, 24) // Should have released the lock. mockassert.CalledOnce(t, lock.ReleaseFunc) @@ -147,19 +166,36 @@ func TestClone_Fail(t *testing.T) { lock := NewMockRepositoryLock() locker.TryAcquireFunc.SetDefaultReturn(lock, true) + getRemoteURLFunc := func(_ context.Context, name api.RepoName) (string, error) { //nolint:unparam + require.Equal(t, repo, name) + return remote, nil + } + s := server.Server{ Logger: logger, ReposDir: reposDir, GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(_ context.Context, name api.RepoName) (string, error) { - require.Equal(t, repo, name) - return remote, nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { require.Equal(t, repo, name) - return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory()), nil + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + require.Equal(t, repo, name) + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %s", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse remote URL %q", raw) + } + return u, nil + }), nil + } + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, DB: db, Perforce: perforce.NewService(ctx, observation.TestContextTB(t), logger, db, list.New()), @@ -210,7 +246,7 @@ func TestClone_Fail(t *testing.T) { // Now, fake that the IsCloneable check passes, then Clone will be called // and is expected to fail. - vcssyncer.TestGitRepoExists = func(ctx context.Context, remoteURL *vcs.URL) error { + vcssyncer.TestGitRepoExists = func(ctx context.Context, name api.RepoName) error { return nil } t.Cleanup(func() { diff --git a/cmd/gitserver/internal/integration_tests/resolverevisions_test.go b/cmd/gitserver/internal/integration_tests/resolverevisions_test.go index a17b706f75ba..758b51ce04ea 100644 --- a/cmd/gitserver/internal/integration_tests/resolverevisions_test.go +++ b/cmd/gitserver/internal/integration_tests/resolverevisions_test.go @@ -3,6 +3,8 @@ package inttests import ( "container/list" "context" + "github.com/sourcegraph/sourcegraph/internal/vcs" + "github.com/sourcegraph/sourcegraph/lib/errors" "net/http/httptest" "net/url" "path/filepath" @@ -69,17 +71,33 @@ func TestClient_ResolveRevisions(t *testing.T) { db := newMockDB() ctx := context.Background() + getRemoteURLFunc := func(_ context.Context, name api.RepoName) (string, error) { //nolint:unparam + return remote, nil + } + s := server.Server{ Logger: logger, ReposDir: filepath.Join(root, "repos"), GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(_ context.Context, name api.RepoName) (string, error) { - return remote, nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { - return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory()), nil + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %s", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse remote URL %q", raw) + } + return u, nil + }), nil + } + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, DB: db, Perforce: perforce.NewService(ctx, observation.TestContextTB(t), logger, db, list.New()), diff --git a/cmd/gitserver/internal/integration_tests/test_utils.go b/cmd/gitserver/internal/integration_tests/test_utils.go index 4ca896543e22..052b33ad4790 100644 --- a/cmd/gitserver/internal/integration_tests/test_utils.go +++ b/cmd/gitserver/internal/integration_tests/test_utils.go @@ -2,6 +2,8 @@ package inttests import ( "context" + "github.com/sourcegraph/sourcegraph/internal/vcs" + "github.com/sourcegraph/sourcegraph/lib/errors" "net" "net/http" "os" @@ -76,6 +78,10 @@ func InitGitserver() { }) db.ReposFunc.SetDefaultReturn(r) + getRemoteURLFunc := func(_ context.Context, name api.RepoName) (string, error) { //nolint:unparam // context is unused but required by the interface, error is not used in this test + return filepath.Join(root, "remotes", string(name)), nil + } + s := server.Server{ Logger: sglog.Scoped("server"), ObservationCtx: &observation.TestContext, @@ -83,11 +89,25 @@ func InitGitserver() { GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(&t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(ctx context.Context, name api.RepoName) (string, error) { - return filepath.Join(root, "remotes", string(name)), nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { - return vcssyncer.NewGitRepoSyncer(logger, wrexec.NewNoOpRecordingCommandFactory()), nil + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %s", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse remote URL %q", raw) + } + + return u, nil + }), nil + } + + return vcssyncer.NewGitRepoSyncer(logger, wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, GlobalBatchLogSemaphore: semaphore.NewWeighted(32), DB: db, diff --git a/cmd/gitserver/internal/server.go b/cmd/gitserver/internal/server.go index 2e5317808c8d..d71e4cfc4533 100644 --- a/cmd/gitserver/internal/server.go +++ b/cmd/gitserver/internal/server.go @@ -10,7 +10,6 @@ import ( "io" "net/http" "os" - "os/exec" "path/filepath" "strconv" "strings" @@ -47,7 +46,6 @@ import ( "github.com/sourcegraph/sourcegraph/internal/gitserver/protocol" "github.com/sourcegraph/sourcegraph/internal/goroutine" "github.com/sourcegraph/sourcegraph/internal/honey" - "github.com/sourcegraph/sourcegraph/internal/lazyregexp" "github.com/sourcegraph/sourcegraph/internal/limiter" "github.com/sourcegraph/sourcegraph/internal/observation" "github.com/sourcegraph/sourcegraph/internal/ratelimit" @@ -357,7 +355,7 @@ func (p *clonePipelineRoutine) cloneJobConsumer(ctx context.Context, tasks <-cha go func(task *cloneTask) { defer cancel() - err := p.s.doClone(ctx, task.repo, task.dir, task.syncer, task.lock, task.remoteURL, task.options) + err := p.s.doClone(ctx, task.repo, task.dir, task.syncer, task.lock, task.options) if err != nil { logger.Error("failed to clone repo", log.Error(err)) } @@ -434,11 +432,8 @@ func (s *Server) IsRepoCloneable(ctx context.Context, repo api.RepoName) (protoc // we return is a bool indicating whether the repo is cloneable or not. Perhaps // the only things that could leak here is whether a private repo exists although // the endpoint is only available internally so it's low risk. - remoteURL, err := s.getRemoteURL(actor.WithInternalActor(ctx), repo) - if err != nil { - return protocol.IsRepoCloneableResponse{}, errors.Wrap(err, "getRemoteURL") - } + ctx = actor.WithInternalActor(ctx) syncer, err := s.GetVCSSyncer(ctx, repo) if err != nil { return protocol.IsRepoCloneableResponse{}, errors.Wrap(err, "GetVCSSyncer") @@ -447,7 +442,7 @@ func (s *Server) IsRepoCloneable(ctx context.Context, repo api.RepoName) (protoc resp := protocol.IsRepoCloneableResponse{ Cloned: repoCloned(gitserverfs.RepoDirFromName(s.ReposDir, repo)), } - err = syncer.IsCloneable(ctx, repo, remoteURL) + err = syncer.IsCloneable(ctx, repo) if err != nil { resp.Reason = err.Error() } @@ -734,10 +729,6 @@ func (s *Server) LogIfCorrupt(ctx context.Context, repo api.RepoName, err error) } } -// testRepoCorrupter is used by tests to disrupt a cloned repository (e.g. deleting -// HEAD, zeroing it out, etc.) -var testRepoCorrupter func(ctx context.Context, tmpDir common.GitDir) - // cloneOptions specify optional behaviour for the cloneRepo function. type CloneOptions struct { // Block will wait for the clone to finish before returning. If the clone @@ -765,6 +756,9 @@ func (s *Server) CloneRepo(ctx context.Context, repo api.RepoName, opts CloneOpt return progress, nil } + // We may be attempting to clone a private repo so we need an internal actor. + ctx = actor.WithInternalActor(ctx) + // We always want to store whether there was an error cloning the repo, but only // after we checked if a clone is already in progress, otherwise we would race with // the actual running clone for the DB state of last_error. @@ -778,8 +772,7 @@ func (s *Server) CloneRepo(ctx context.Context, repo api.RepoName, opts CloneOpt return "", errors.Wrap(err, "get VCS syncer") } - // We may be attempting to clone a private repo so we need an internal actor. - remoteURL, err := s.getRemoteURL(actor.WithInternalActor(ctx), repo) + remoteURL, err := s.getRemoteURL(ctx, repo) if err != nil { return "", err } @@ -788,7 +781,7 @@ func (s *Server) CloneRepo(ctx context.Context, repo api.RepoName, opts CloneOpt return "", err } - if err := syncer.IsCloneable(ctx, repo, remoteURL); err != nil { + if err := syncer.IsCloneable(ctx, repo); err != nil { redactedErr := urlredactor.New(remoteURL).Redact(err.Error()) return "", errors.Errorf("error cloning repo: repo %s not cloneable: %s", repo, redactedErr) } @@ -816,7 +809,7 @@ func (s *Server) CloneRepo(ctx context.Context, repo api.RepoName, opts CloneOpt defer cancel() // We are blocking, so use the passed in context. - err = s.doClone(ctx, repo, dir, syncer, lock, remoteURL, opts) + err = s.doClone(ctx, repo, dir, syncer, lock, opts) err = errors.Wrapf(err, "failed to clone %s", repo) return "", err } @@ -851,7 +844,6 @@ func (s *Server) doClone( dir common.GitDir, syncer vcssyncer.VCSSyncer, lock RepositoryLock, - remoteURL *vcs.URL, opts CloneOptions, ) (err error) { logger := s.Logger.Scoped("doClone").With(log.String("repo", string(repo))) @@ -915,7 +907,7 @@ func (s *Server) doClone( output := &linebasedBufferedWriter{} eg := readCloneProgress(s.DB, logger, lock, io.TeeReader(progressReader, output), repo) - cloneErr := syncer.Clone(ctx, repo, remoteURL, dir, tmpPath, progressWriter) + cloneErr := syncer.Clone(ctx, repo, dir, tmpPath, progressWriter) progressWriter.Close() if err := eg.Wait(); err != nil { @@ -933,11 +925,7 @@ func (s *Server) doClone( return errors.Wrapf(cloneErr, "clone failed. Output: %s", output.String()) } - if testRepoCorrupter != nil { - testRepoCorrupter(ctx, common.GitDir(tmpPath)) - } - - if err := postRepoFetchActions(ctx, logger, s.DB, s.Hostname, s.RecordingCommandFactory, repo, common.GitDir(tmpPath), remoteURL, syncer); err != nil { + if err := postRepoFetchActions(ctx, logger, s.DB, s.Hostname, s.RecordingCommandFactory, repo, common.GitDir(tmpPath), syncer); err != nil { return err } @@ -1031,7 +1019,6 @@ func postRepoFetchActions( rcf *wrexec.RecordingCommandFactory, repo api.RepoName, dir common.GitDir, - remoteURL *vcs.URL, syncer vcssyncer.VCSSyncer, ) (errs error) { backend := gitcli.NewBackend(logger, rcf, dir, repo) @@ -1039,12 +1026,6 @@ func postRepoFetchActions( // Note: We use a multi error in this function to try to make as many of the // post repo fetch actions succeed. - // We run setHEAD first, because other commands further down can fail when no - // head exists. - if err := setHEAD(ctx, logger, rcf, repo, dir, syncer, remoteURL); err != nil { - errs = errors.Append(errs, errors.Wrapf(err, "failed to ensure HEAD exists for repo %q", repo)) - } - if err := git.RemoveBadRefs(ctx, dir); err != nil { errs = errors.Append(errs, errors.Wrapf(err, "failed to remove bad refs for repo %q", repo)) } @@ -1207,8 +1188,6 @@ var ( }) ) -var headBranchPattern = lazyregexp.New(`HEAD branch: (.+?)\n`) - func (s *Server) doRepoUpdate(ctx context.Context, repo api.RepoName, revspec string) (err error) { tr, ctx := trace.New(ctx, "doRepoUpdate", repo.Attr()) defer tr.EndWithErr(&err) @@ -1316,7 +1295,7 @@ func (s *Server) doBackgroundRepoUpdate(repo api.RepoName, revspec string) error // TODO: Should be done in janitor. defer git.CleanTmpPackFiles(s.Logger, dir) - output, err := syncer.Fetch(ctx, remoteURL, repo, dir, revspec) + output, err := syncer.Fetch(ctx, repo, dir, revspec) // TODO: Move the redaction also into the VCSSyncer layer here, to be in line // with what clone does. redactedOutput := urlredactor.New(remoteURL).Redact(string(output)) @@ -1333,70 +1312,7 @@ func (s *Server) doBackgroundRepoUpdate(repo api.RepoName, revspec string) error } } - return postRepoFetchActions(ctx, logger, s.DB, s.Hostname, s.RecordingCommandFactory, repo, dir, remoteURL, syncer) -} - -// setHEAD configures git repo defaults (such as what HEAD is) which are -// needed for git commands to work. -func setHEAD(ctx context.Context, logger log.Logger, rcf *wrexec.RecordingCommandFactory, repoName api.RepoName, dir common.GitDir, syncer vcssyncer.VCSSyncer, remoteURL *vcs.URL) error { - // Verify that there is a HEAD file within the repo, and that it is of - // non-zero length. - if err := git.EnsureHEAD(dir); err != nil { - logger.Error("failed to ensure HEAD exists", log.Error(err), log.String("repo", string(repoName))) - } - - // Fallback to git's default branch name if git remote show fails. - headBranch := "master" - - // try to fetch HEAD from origin - cmd, err := syncer.RemoteShowCommand(ctx, remoteURL) - if err != nil { - return errors.Wrap(err, "get remote show command") - } - dir.Set(cmd) - r := urlredactor.New(remoteURL) - output, err := executil.RunRemoteGitCommand(ctx, rcf.WrapWithRepoName(ctx, logger, repoName, cmd).WithRedactorFunc(r.Redact), true) - if err != nil { - logger.Error("Failed to fetch remote info", log.Error(err), log.String("output", string(output))) - return errors.Wrap(err, "failed to fetch remote info") - } - - submatches := headBranchPattern.FindSubmatch(output) - if len(submatches) == 2 { - submatch := string(submatches[1]) - if submatch != "(unknown)" { - headBranch = submatch - } - } - - // check if branch pointed to by HEAD exists - cmd = exec.CommandContext(ctx, "git", "rev-parse", headBranch, "--") - dir.Set(cmd) - if err := cmd.Run(); err != nil { - // branch does not exist, pick first branch - cmd := exec.CommandContext(ctx, "git", "branch") - dir.Set(cmd) - output, err := cmd.Output() - if err != nil { - logger.Error("Failed to list branches", log.Error(err), log.String("output", string(output))) - return errors.Wrap(err, "failed to list branches") - } - lines := strings.Split(string(output), "\n") - branch := strings.TrimPrefix(strings.TrimPrefix(lines[0], "* "), " ") - if branch != "" { - headBranch = branch - } - } - - // set HEAD - cmd = exec.CommandContext(ctx, "git", "symbolic-ref", "HEAD", "refs/heads/"+headBranch) - dir.Set(cmd) - if output, err := cmd.CombinedOutput(); err != nil { - logger.Error("Failed to set HEAD", log.Error(err), log.String("output", string(output))) - return errors.Wrap(err, "Failed to set HEAD") - } - - return nil + return postRepoFetchActions(ctx, logger, s.DB, s.Hostname, s.RecordingCommandFactory, repo, dir, syncer) } // setLastChanged discerns an approximate last-changed timestamp for a diff --git a/cmd/gitserver/internal/server_test.go b/cmd/gitserver/internal/server_test.go index 5731bd1c1f73..311fd2514e21 100644 --- a/cmd/gitserver/internal/server_test.go +++ b/cmd/gitserver/internal/server_test.go @@ -5,6 +5,7 @@ import ( "container/list" "context" "fmt" + "github.com/sourcegraph/sourcegraph/internal/actor" "io" "os" "os/exec" @@ -136,6 +137,10 @@ func TestExecRequest(t *testing.T) { }, } + getRemoteURLFunc := func(ctx context.Context, name api.RepoName) (string, error) { + return "https://" + string(name) + ".git", nil + } + db := dbmocks.NewMockDB() gr := dbmocks.NewMockGitserverRepoStore() db.GitserverReposFunc.SetDefaultReturn(gr) @@ -178,11 +183,22 @@ func TestExecRequest(t *testing.T) { }) return backend }, - GetRemoteURLFunc: func(ctx context.Context, name api.RepoName) (string, error) { - return "https://" + string(name) + ".git", nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { - return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory()), nil + + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw := "https://" + string(name) + ".git" + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse URL %q", raw) + } + + return u, nil + }), nil + } + + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, DB: db, RecordingCommandFactory: wrexec.NewNoOpRecordingCommandFactory(), @@ -200,10 +216,11 @@ func TestExecRequest(t *testing.T) { } t.Cleanup(func() { repoCloned = origRepoCloned }) - vcssyncer.TestGitRepoExists = func(ctx context.Context, remoteURL *vcs.URL) error { - if remoteURL.String() == "https://github.com/nicksnyder/go-i18n.git" { + vcssyncer.TestGitRepoExists = func(ctx context.Context, repoName api.RepoName) error { + if strings.Contains(string(repoName), "nicksnyder/go-i18n") { return nil } + return errors.New("not cloneable") } t.Cleanup(func() { vcssyncer.TestGitRepoExists = nil }) @@ -302,6 +319,10 @@ func makeTestServer(ctx context.Context, t *testing.T, repoDir, remote string, d logger := logtest.Scoped(t) obctx := observation.TestContextTB(t) + getRemoteURLFunc := func(ctx context.Context, name api.RepoName) (string, error) { + return remote, nil + } + cloneQueue := NewCloneQueue(obctx, list.New()) s := &Server{ Logger: logger, @@ -310,12 +331,26 @@ func makeTestServer(ctx context.Context, t *testing.T, repoDir, remote string, d GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { return gitcli.NewBackend(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), dir, repoName) }, - GetRemoteURLFunc: func(context.Context, api.RepoName) (string, error) { - return remote, nil - }, + GetRemoteURLFunc: getRemoteURLFunc, GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { + getRemoteURLSource := func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + raw, err := getRemoteURLFunc(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to get remote URL for %q", name) + } + + u, err := vcs.ParseURL(raw) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse URL %q", raw) + } + + return u, nil + }), nil + } + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec. - NewNoOpRecordingCommandFactory()), nil + NewNoOpRecordingCommandFactory(), getRemoteURLSource), nil }, DB: db, CloneQueue: cloneQueue, @@ -488,7 +523,7 @@ func TestCloneRepoRecordsFailures(t *testing.T) { name: "Not cloneable", getVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { m := vcssyncer.NewMockVCSSyncer() - m.IsCloneableFunc.SetDefaultHook(func(context.Context, api.RepoName, *vcs.URL) error { + m.IsCloneableFunc.SetDefaultHook(func(context.Context, api.RepoName) error { return errors.New("not_cloneable") }) return m, nil @@ -499,7 +534,7 @@ func TestCloneRepoRecordsFailures(t *testing.T) { name: "Failing clone", getVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { m := vcssyncer.NewMockVCSSyncer() - m.CloneFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, _ *vcs.URL, _ common.GitDir, _ string, w io.Writer) error { + m.CloneFunc.SetDefaultHook(func(_ context.Context, _ api.RepoName, _ common.GitDir, _ string, w io.Writer) error { _, err := fmt.Fprint(w, "fatal: repository '/dev/null' does not exist") require.NoError(t, err) return &exec.ExitError{ProcessState: &os.ProcessState{}} @@ -565,9 +600,26 @@ func TestHandleRepoUpdate(t *testing.T) { // Confirm that failing to clone the repo stores the error oldRemoveURLFunc := s.GetRemoteURLFunc + oldVCSSyncer := s.GetVCSSyncer + + fakeURL := "https://invalid.example.com/" + s.GetRemoteURLFunc = func(ctx context.Context, name api.RepoName) (string, error) { - return "https://invalid.example.com/", nil + return fakeURL, nil + } + s.GetVCSSyncer = func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { + return vcssyncer.NewGitRepoSyncer(logtest.Scoped(t), wrexec.NewNoOpRecordingCommandFactory(), func(ctx context.Context, name api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + u, err := vcs.ParseURL(fakeURL) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse URL %q", fakeURL) + } + + return u, nil + }), nil + }), nil } + s.RepoUpdate(&protocol.RepoUpdateRequest{ Repo: repoName, }) @@ -599,6 +651,7 @@ func TestHandleRepoUpdate(t *testing.T) { // This will perform an initial clone s.GetRemoteURLFunc = oldRemoveURLFunc + s.GetVCSSyncer = oldVCSSyncer s.RepoUpdate(&protocol.RepoUpdateRequest{ Repo: repoName, }) @@ -732,12 +785,12 @@ func TestCloneRepo_EnsureValidity(t *testing.T) { _ = makeSingleCommitRepo(cmd) s := makeTestServer(ctx, t, reposDir, remote, nil) - testRepoCorrupter = func(_ context.Context, tmpDir common.GitDir) { + vcssyncer.TestRepositoryPostFetchCorruptionFunc = func(_ context.Context, tmpDir common.GitDir) { if err := os.Remove(tmpDir.Path("HEAD")); err != nil { t.Fatal(err) } } - t.Cleanup(func() { testRepoCorrupter = nil }) + t.Cleanup(func() { vcssyncer.TestRepositoryPostFetchCorruptionFunc = nil }) // Use block so we get clone errors right here and don't have to rely on the // clone queue. There's no other reason for blocking here, just convenience/simplicity. _, err := s.CloneRepo(ctx, repoName, CloneOptions{Block: true}) @@ -765,10 +818,10 @@ func TestCloneRepo_EnsureValidity(t *testing.T) { _ = makeSingleCommitRepo(cmd) s := makeTestServer(ctx, t, reposDir, remote, nil) - testRepoCorrupter = func(_ context.Context, tmpDir common.GitDir) { + vcssyncer.TestRepositoryPostFetchCorruptionFunc = func(_ context.Context, tmpDir common.GitDir) { cmd("sh", "-c", fmt.Sprintf(": > %s/HEAD", tmpDir)) } - t.Cleanup(func() { testRepoCorrupter = nil }) + t.Cleanup(func() { vcssyncer.TestRepositoryPostFetchCorruptionFunc = nil }) if _, err := s.CloneRepo(ctx, "example.com/foo/bar", CloneOptions{Block: true}); err != nil { t.Fatalf("expected no error, got %v", err) } @@ -1246,3 +1299,90 @@ func TestLinebasedBufferedWriter(t *testing.T) { }) } } + +func TestServer_IsRepoCloneable_InternalActor(t *testing.T) { + logger := logtest.Scoped(t) + db := database.NewDB(logger, dbtest.NewDB(t)) + + reposDir := t.TempDir() + + isCloneableCalled := false + + s := &Server{ + Logger: logtest.Scoped(t), + ReposDir: reposDir, + GetBackendFunc: func(dir common.GitDir, repoName api.RepoName) git.GitBackend { + return git.NewMockGitBackend() + }, + GetRemoteURLFunc: func(_ context.Context, _ api.RepoName) (string, error) { + return "", errors.New("unimplemented") + }, + GetVCSSyncer: func(ctx context.Context, name api.RepoName) (vcssyncer.VCSSyncer, error) { + return &mockVCSSyncer{ + mockIsCloneable: func(ctx context.Context, repoName api.RepoName) error { + isCloneableCalled = true + + a := actor.FromContext(ctx) + // We expect the actor to be internal since the repository might be private. + // See the comment in the implementation of IsRepoCloneable. + if !a.IsInternal() { + t.Fatalf("expected internal actor: %v", a) + } + + return nil + }, + }, nil + + }, + DB: db, + RecordingCommandFactory: wrexec.NewNoOpRecordingCommandFactory(), + Locker: NewRepositoryLocker(), + RPSLimiter: ratelimit.NewInstrumentedLimiter("GitserverTest", rate.NewLimiter(rate.Inf, 10)), + } + + _, err := s.IsRepoCloneable(context.Background(), "foo") + require.NoError(t, err) + require.True(t, isCloneableCalled) + +} + +type mockVCSSyncer struct { + mockTypeFunc func() string + mockIsCloneable func(ctx context.Context, repoName api.RepoName) error + mockClone func(ctx context.Context, repo api.RepoName, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) error + mockFetch func(ctx context.Context, repoName api.RepoName, dir common.GitDir, revspec string) ([]byte, error) +} + +func (m *mockVCSSyncer) Type() string { + if m.mockTypeFunc != nil { + return m.mockTypeFunc() + } + + panic("no mock for Type() is set") +} + +func (m *mockVCSSyncer) IsCloneable(ctx context.Context, repoName api.RepoName) error { + if m.mockIsCloneable != nil { + return m.mockIsCloneable(ctx, repoName) + } + + return errors.New("no mock for IsCloneable() is set") +} + +func (m *mockVCSSyncer) Clone(ctx context.Context, repo api.RepoName, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) error { + if m.mockClone != nil { + return m.mockClone(ctx, repo, targetDir, tmpPath, progressWriter) + } + + return errors.New("no mock for Clone() is set") +} + +func (m *mockVCSSyncer) Fetch(ctx context.Context, repoName api.RepoName, dir common.GitDir, revspec string) ([]byte, error) { + if m.mockFetch != nil { + return m.mockFetch(ctx, repoName, dir, revspec) + } + + return nil, errors.New("no mock for Fetch() is set") +} + +var _ vcssyncer.VCSSyncer = &mockVCSSyncer{} diff --git a/cmd/gitserver/internal/vcssyncer/BUILD.bazel b/cmd/gitserver/internal/vcssyncer/BUILD.bazel index 694f4a4de8b3..dba84aaf18f0 100644 --- a/cmd/gitserver/internal/vcssyncer/BUILD.bazel +++ b/cmd/gitserver/internal/vcssyncer/BUILD.bazel @@ -43,8 +43,10 @@ go_library( "//internal/extsvc/npm", "//internal/extsvc/pypi", "//internal/extsvc/rubygems", + "//internal/gitserver/protocol", "//internal/httpcli", "//internal/jsonc", + "//internal/lazyregexp", "//internal/observation", "//internal/unpack", "//internal/vcs", diff --git a/cmd/gitserver/internal/vcssyncer/git.go b/cmd/gitserver/internal/vcssyncer/git.go index 409c0f381cf6..e8f0bb791990 100644 --- a/cmd/gitserver/internal/vcssyncer/git.go +++ b/cmd/gitserver/internal/vcssyncer/git.go @@ -1,7 +1,10 @@ package vcssyncer import ( + "bytes" "context" + "github.com/sourcegraph/sourcegraph/internal/gitserver/protocol" + "github.com/sourcegraph/sourcegraph/internal/lazyregexp" "io" "os" "os/exec" @@ -16,7 +19,6 @@ import ( "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/executil" "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/git" "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/urlredactor" - "github.com/sourcegraph/sourcegraph/internal/vcs" "github.com/sourcegraph/sourcegraph/internal/wrexec" "github.com/sourcegraph/sourcegraph/lib/errors" ) @@ -25,10 +27,17 @@ import ( type gitRepoSyncer struct { logger log.Logger recordingCommandFactory *wrexec.RecordingCommandFactory + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error) } -func NewGitRepoSyncer(logger log.Logger, r *wrexec.RecordingCommandFactory) *gitRepoSyncer { - return &gitRepoSyncer{logger: logger.Scoped("GitRepoSyncer"), recordingCommandFactory: r} +func NewGitRepoSyncer( + logger log.Logger, + r *wrexec.RecordingCommandFactory, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error)) *gitRepoSyncer { + return &gitRepoSyncer{ + logger: logger.Scoped("GitRepoSyncer"), + recordingCommandFactory: r, + getRemoteURLSource: getRemoteURLSource} } func (s *gitRepoSyncer) Type() string { @@ -37,97 +46,170 @@ func (s *gitRepoSyncer) Type() string { // TestGitRepoExists is a test fixture that overrides the return value for // GitRepoSyncer.IsCloneable when it is set. -var TestGitRepoExists func(ctx context.Context, remoteURL *vcs.URL) error +var TestGitRepoExists func(ctx context.Context, repoName api.RepoName) error // IsCloneable checks to see if the Git remote URL is cloneable. -func (s *gitRepoSyncer) IsCloneable(ctx context.Context, repoName api.RepoName, remoteURL *vcs.URL) error { - if isAlwaysCloningTestRemoteURL(remoteURL) { +func (s *gitRepoSyncer) IsCloneable(ctx context.Context, repoName api.RepoName) (err error) { + if isAlwaysCloningTest(repoName) { return nil } + if TestGitRepoExists != nil { - return TestGitRepoExists(ctx, remoteURL) + return TestGitRepoExists(ctx, repoName) } - args := []string{"ls-remote", remoteURL.String(), "HEAD"} - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - - r := urlredactor.New(remoteURL) - cmd := exec.CommandContext(ctx, "git", args...) - out, err := executil.RunRemoteGitCommand(ctx, s.recordingCommandFactory.WrapWithRepoName(ctx, log.NoOp(), repoName, cmd).WithRedactorFunc(r.Redact), true) + source, err := s.getRemoteURLSource(ctx, repoName) if err != nil { - if ctxerr := ctx.Err(); ctxerr != nil { - err = ctxerr + return errors.Wrapf(err, "failed to get remote URL source for %s", repoName) + } + + { + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return errors.Wrapf(err, "failed to get remote URL for %s", repoName) } - if len(out) > 0 { - err = errors.Wrap(err, "failed to check remote access: "+string(out)) + + args := []string{"ls-remote", remoteURL.String(), "HEAD"} + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + r := urlredactor.New(remoteURL) + cmd := exec.CommandContext(ctx, "git", args...) + out, err := executil.RunRemoteGitCommand(ctx, s.recordingCommandFactory.WrapWithRepoName(ctx, log.NoOp(), repoName, cmd).WithRedactorFunc(r.Redact), true) + if err != nil { + if ctxerr := ctx.Err(); ctxerr != nil { + err = ctxerr + } + if len(out) > 0 { + redactedOutput := urlredactor.New(remoteURL).Redact(string(out)) + err = errors.Wrap(err, "failed to check remote access: "+redactedOutput) + } + return err } - return err } + return nil } +// TestRepositoryPostFetchCorruptionFunc is a test hook that, when set, overrides the +// behavior of the GitRepoSyncer immediately after a repository has been fetched or cloned. It is +// called with the context and the temporary directory of the repository that was fetched. +// +// This can be used by tests to disrupt a cloned repository (e.g. deleting +// +// HEAD, zeroing it out, etc.) +var TestRepositoryPostFetchCorruptionFunc func(ctx context.Context, dir common.GitDir) + // Clone clones a Git repository into tmpPath, reporting redacted progress logs // via the progressWriter. // We "clone" a repository by first creating a bare repo and then fetching the // configured refs into it from the remote. -func (s *gitRepoSyncer) Clone(ctx context.Context, repo api.RepoName, remoteURL *vcs.URL, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { +func (s *gitRepoSyncer) Clone(ctx context.Context, repo api.RepoName, _ common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { + dir := common.GitDir(tmpPath) + // First, make sure the tmpPath exists. - if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { + if err := os.MkdirAll(string(dir), os.ModePerm); err != nil { return errors.Wrapf(err, "clone failed to create tmp dir") } // Next, initialize a bare repo in that tmp path. - tryWrite(s.logger, progressWriter, "Creating bare repo\n") - if err := git.MakeBareRepo(ctx, tmpPath); err != nil { - return err + { + tryWrite(s.logger, progressWriter, "Creating bare repo\n") + + if err := git.MakeBareRepo(ctx, string(dir)); err != nil { + return err + } + + tryWrite(s.logger, progressWriter, "Created bare repo at %s\n", string(dir)) + } + + source, err := s.getRemoteURLSource(ctx, repo) + if err != nil { + return errors.Wrapf(err, "failed to get remote URL source for %q", repo) } - tryWrite(s.logger, progressWriter, "Created bare repo at %s\n", tmpPath) // Now we build our fetch command. We don't actually clone, instead we init // a bare repository and fetch all refs from remote once into local refs. - cmd, _ := s.fetchCommand(ctx, remoteURL) - cmd.Dir = tmpPath - if cmd.Env == nil { - cmd.Env = os.Environ() + { + tryWrite(s.logger, progressWriter, "Fetching remote contents\n") + + exitCode, err := s.runFetchCommand(ctx, repo, source, progressWriter, dir) + if err != nil { + return errors.Wrapf(err, "failed to fetch: exit status %d", exitCode) + } + + tryWrite(s.logger, progressWriter, "Fetched remote contents\n") } - // see issue #7322: skip LFS content in repositories with Git LFS configured. - cmd.Env = append(cmd.Env, "GIT_LFS_SKIP_SMUDGE=1") - executil.ConfigureRemoteGitCommand(cmd) - tryWrite(s.logger, progressWriter, "Fetching remote contents\n") - redactor := urlredactor.New(remoteURL) - wrCmd := s.recordingCommandFactory.WrapWithRepoName(ctx, s.logger, repo, cmd).WithRedactorFunc(redactor.Redact) - // Note: Using RunCommandWriteOutput here does NOT store the output of the - // command as the command output of the wrexec command, because the pipes are - // already used. - exitCode, err := executil.RunCommandWriteOutput(ctx, wrCmd, progressWriter, redactor.Redact) - if err != nil { - return errors.Wrapf(err, "failed to fetch: exit status %d", exitCode) + if TestRepositoryPostFetchCorruptionFunc != nil { + TestRepositoryPostFetchCorruptionFunc(ctx, dir) + } + + // Finally, set the local HEAD to the remote HEAD. + { + tryWrite(s.logger, progressWriter, "Setting local HEAD to remote HEAD\n") + + err = s.setHEAD(ctx, repo, dir, source) + if err != nil { + return errors.Wrap(err, "failed to set local HEAD to remote HEAD") + } + + tryWrite(s.logger, progressWriter, "Finished setting local HEAD to remote HEAD\n") } return nil } // Fetch tries to fetch updates of a Git repository. -func (s *gitRepoSyncer) Fetch(ctx context.Context, remoteURL *vcs.URL, repoName api.RepoName, dir common.GitDir, _ string) ([]byte, error) { - cmd, configRemoteOpts := s.fetchCommand(ctx, remoteURL) - dir.Set(cmd) - r := urlredactor.New(remoteURL) - output, err := executil.RunRemoteGitCommand(ctx, s.recordingCommandFactory.WrapWithRepoName(ctx, log.NoOp(), repoName, cmd).WithRedactorFunc(r.Redact), configRemoteOpts) +func (s *gitRepoSyncer) Fetch(ctx context.Context, repoName api.RepoName, dir common.GitDir, _ string) ([]byte, error) { + source, err := s.getRemoteURLSource(ctx, repoName) if err != nil { - return nil, errors.Wrap(err, "failed to fetch from remote: "+string(output)) + return nil, errors.Wrapf(err, "failed to get remote URL source for %s", repoName) + } + + var output bytes.Buffer + + // Fetch the remote contents. + { + tryWrite(s.logger, &output, "Fetching remote contents\n") + + exitCode, err := s.runFetchCommand(ctx, repoName, source, &output, dir) + if err != nil { + return nil, errors.Wrapf(err, "exit code: %d, failed to fetch from remote: %s", exitCode, output.String()) + } + + tryWrite(s.logger, &output, "Fetched remote contents\n") + + } + + if TestRepositoryPostFetchCorruptionFunc != nil { + TestRepositoryPostFetchCorruptionFunc(ctx, dir) + } + + // Set the local HEAD to the remote HEAD. + { + tryWrite(s.logger, &output, "Setting local HEAD to remote HEAD\n") + + err := s.setHEAD(ctx, repoName, dir, source) + if err != nil { + return nil, errors.Wrap(err, "failed to set local HEAD to remote HEAD") + } + + tryWrite(s.logger, &output, "Finished setting local HEAD to remote HEAD\n") } - return output, nil -} -// RemoteShowCommand returns the command to be executed for showing remote of a Git repository. -func (s *gitRepoSyncer) RemoteShowCommand(ctx context.Context, remoteURL *vcs.URL) (cmd *exec.Cmd, err error) { - return exec.CommandContext(ctx, "git", "remote", "show", remoteURL.String()), nil + return output.Bytes(), nil } -func (s *gitRepoSyncer) fetchCommand(ctx context.Context, remoteURL *vcs.URL) (cmd *exec.Cmd, configRemoteOpts bool) { - configRemoteOpts = true +func (s *gitRepoSyncer) runFetchCommand(ctx context.Context, repoName api.RepoName, source RemoteURLSource, progressWriter io.Writer, dir common.GitDir) (exitCode int, err error) { + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return -1, errors.Wrap(err, "failed to get remote URL") + } + + var cmd *exec.Cmd + + configRemoteOpts := true if customCmd := customFetchCmd(ctx, remoteURL); customCmd != nil { cmd = customCmd configRemoteOpts = false @@ -149,10 +231,119 @@ func (s *gitRepoSyncer) fetchCommand(ctx context.Context, remoteURL *vcs.URL) (c // Possibly deprecated refs for sourcegraph zap experiment? "+refs/sourcegraph/*:refs/sourcegraph/*") } - return cmd, configRemoteOpts + + if cmd.Env == nil { + cmd.Env = os.Environ() + } + + // see issue #7322: skip LFS content in repositories with Git LFS configured. + cmd.Env = append(cmd.Env, "GIT_LFS_SKIP_SMUDGE=1") + + // Set the working directory for the command. + dir.Set(cmd) + + if configRemoteOpts { + // Configure the command to be able to talk to a remote. + executil.ConfigureRemoteGitCommand(cmd) + } + + redactor := urlredactor.New(remoteURL) + wrCmd := s.recordingCommandFactory.WrapWithRepoName(ctx, s.logger, repoName, cmd).WithRedactorFunc(redactor.Redact) + + // Note: Using RunCommandWriteOutput here does NOT store the output of the + // command as the command output of the wrexec command, because the pipes are + // already used. + + return executil.RunCommandWriteOutput(ctx, wrCmd, progressWriter, redactor.Redact) +} + +var headBranchPattern = lazyregexp.New(`HEAD branch: (.+?)\n`) + +// setHEAD configures git repo defaults (such as what HEAD is) which are +// needed for git commands to work. +func (s *gitRepoSyncer) setHEAD(ctx context.Context, repoName api.RepoName, dir common.GitDir, source RemoteURLSource) error { + // Verify that there is a HEAD file within the repo, and that it is of + // non-zero length. + if err := git.EnsureHEAD(dir); err != nil { + s.logger.Error("failed to ensure HEAD exists", log.Error(err), log.String("repo", string(repoName))) + } + + // Fallback to git's default branch name if git remote show fails. + headBranch := "master" + + // Get the default branch name from the remote. + { + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return errors.Wrapf(err, "failed to get remote URL for %s", repoName) + } + + // try to fetch HEAD from origin + cmd := exec.CommandContext(ctx, "git", "remote", "show", remoteURL.String()) + dir.Set(cmd) + r := urlredactor.New(remoteURL) + + // Configure the command to be able to talk to a remote. + executil.ConfigureRemoteGitCommand(cmd) + + output, err := s.recordingCommandFactory.WrapWithRepoName(ctx, s.logger, repoName, cmd).WithRedactorFunc(r.Redact).CombinedOutput() + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + exitErr.Stderr = []byte(r.Redact(string(exitErr.Stderr))) // Ensure we redact the error message + + s.logger.Error("Failed to fetch remote info", log.Error(exitErr), log.String("output", string(output))) + return errors.Wrap(exitErr, "failed to fetch remote info") + } + + s.logger.Error("Failed to fetch remote info", log.Error(err), log.String("output", string(output))) + return errors.Wrap(err, "failed to fetch remote info") + } + + submatches := headBranchPattern.FindSubmatch(output) + if len(submatches) == 2 { + submatch := string(submatches[1]) + if submatch != "(unknown)" { + headBranch = submatch + } + } + } + + // check if branch pointed to by HEAD exists + { + cmd := exec.CommandContext(ctx, "git", "rev-parse", headBranch, "--") + dir.Set(cmd) + if err := cmd.Run(); err != nil { + // branch does not exist, pick first branch + cmd := exec.CommandContext(ctx, "git", "branch") + dir.Set(cmd) + output, err := cmd.Output() + if err != nil { + s.logger.Error("Failed to list branches", log.Error(err), log.String("output", string(output))) + return errors.Wrap(err, "failed to list branches") + } + lines := strings.Split(string(output), "\n") + branch := strings.TrimPrefix(strings.TrimPrefix(lines[0], "* "), " ") + if branch != "" { + headBranch = branch + } + } + } + + // set HEAD + { + cmd := exec.CommandContext(ctx, "git", "symbolic-ref", "HEAD", "refs/heads/"+headBranch) + dir.Set(cmd) + output, err := cmd.CombinedOutput() + if err != nil { + s.logger.Error("Failed to set HEAD", log.Error(err), log.String("output", string(output))) + return errors.Wrap(err, "Failed to set HEAD") + } + } + + return nil } -func isAlwaysCloningTestRemoteURL(remoteURL *vcs.URL) bool { - return strings.EqualFold(remoteURL.Host, "github.com") && - strings.EqualFold(remoteURL.Path, "sourcegraphtest/alwayscloningtest") +func isAlwaysCloningTest(name api.RepoName) bool { + return protocol.NormalizeRepo(name).Equal("github.com/sourcegraphtest/alwayscloningtest") } diff --git a/cmd/gitserver/internal/vcssyncer/go_modules.go b/cmd/gitserver/internal/vcssyncer/go_modules.go index d7db68147492..3b3471c1ed1a 100644 --- a/cmd/gitserver/internal/vcssyncer/go_modules.go +++ b/cmd/gitserver/internal/vcssyncer/go_modules.go @@ -27,6 +27,7 @@ func NewGoModulesSyncer( connection *schema.GoModulesConnection, svc *dependencies.Service, client *gomodproxy.Client, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir string, ) VCSSyncer { placeholder, err := reposource.ParseGoVersionedPackage("sourcegraph.com/placeholder@v0.0.0") @@ -35,14 +36,15 @@ func NewGoModulesSyncer( } return &vcsPackagesSyncer{ - logger: log.Scoped("GoModulesSyncer"), - typ: "go_modules", - scheme: dependencies.GoPackagesScheme, - placeholder: placeholder, - svc: svc, - configDeps: connection.Dependencies, - source: &goModulesSyncer{client: client, reposDir: reposDir}, - reposDir: reposDir, + logger: log.Scoped("GoModulesSyncer"), + typ: "go_modules", + scheme: dependencies.GoPackagesScheme, + placeholder: placeholder, + svc: svc, + configDeps: connection.Dependencies, + source: &goModulesSyncer{client: client, reposDir: reposDir}, + reposDir: reposDir, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/jvm_packages.go b/cmd/gitserver/internal/vcssyncer/jvm_packages.go index b03c19e3fb6b..cf4e3961e12d 100644 --- a/cmd/gitserver/internal/vcssyncer/jvm_packages.go +++ b/cmd/gitserver/internal/vcssyncer/jvm_packages.go @@ -36,7 +36,7 @@ const ( jvmMajorVersion0 = 44 ) -func NewJVMPackagesSyncer(connection *schema.JVMPackagesConnection, svc *dependencies.Service, cacheDir string, reposDir string) VCSSyncer { +func NewJVMPackagesSyncer(connection *schema.JVMPackagesConnection, svc *dependencies.Service, getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), cacheDir string, reposDir string) VCSSyncer { placeholder, err := reposource.ParseMavenVersionedPackage("com.sourcegraph:sourcegraph:1.0.0") if err != nil { panic(fmt.Sprintf("expected placeholder package to parse but got %v", err)) @@ -57,6 +57,7 @@ func NewJVMPackagesSyncer(connection *schema.JVMPackagesConnection, svc *depende config: connection, fetch: chandle.FetchSources, }, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/jvm_packages_test.go b/cmd/gitserver/internal/vcssyncer/jvm_packages_test.go index 30273110cab7..453c14f16ce0 100644 --- a/cmd/gitserver/internal/vcssyncer/jvm_packages_test.go +++ b/cmd/gitserver/internal/vcssyncer/jvm_packages_test.go @@ -4,6 +4,8 @@ import ( "archive/zip" "context" "fmt" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/vcs" "os" "os/exec" "path" @@ -201,12 +203,24 @@ func TestJVMCloneCommand(t *testing.T) { coursier.CoursierBinary = coursierScript(t, dir) + var testGetRemoteURLSource = func(ctx context.Context, name api.RepoName) (RemoteURLSource, error) { + return RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + u, err := vcs.ParseURL(examplePackageUrl) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse example package URL %q", examplePackageUrl) + } + + return u, nil + + }), nil + } + depsSvc := dependencies.TestService(database.NewDB(logger, dbtest.NewDB(t))) cacheDir := filepath.Join(dir, "cache") - s := NewJVMPackagesSyncer(&schema.JVMPackagesConnection{Maven: schema.Maven{Dependencies: []string{}}}, depsSvc, cacheDir, dir).(*vcsPackagesSyncer) + s := NewJVMPackagesSyncer(&schema.JVMPackagesConnection{Maven: schema.Maven{Dependencies: []string{}}}, depsSvc, testGetRemoteURLSource, cacheDir, dir).(*vcsPackagesSyncer) bareGitDirectory := path.Join(dir, "git") - s.runCloneCommand(t, examplePackageUrl, bareGitDirectory, []string{exampleVersionedPackage}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleVersionedPackage}) assertCommandOutput(t, exec.Command("git", "tag", "--list"), bareGitDirectory, @@ -218,7 +232,7 @@ func TestJVMCloneCommand(t *testing.T) { exampleFileContents, ) - s.runCloneCommand(t, examplePackageUrl, bareGitDirectory, []string{exampleVersionedPackage, exampleVersionedPackage2}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleVersionedPackage, exampleVersionedPackage2}) assertCommandOutput(t, exec.Command("git", "tag", "--list"), bareGitDirectory, @@ -250,7 +264,7 @@ func TestJVMCloneCommand(t *testing.T) { exampleFileContents2, ) - s.runCloneCommand(t, examplePackageUrl, bareGitDirectory, []string{exampleVersionedPackage}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleVersionedPackage}) assertCommandOutput(t, exec.Command("git", "show", fmt.Sprintf("v%s:%s", exampleVersion, exampleFilePath)), bareGitDirectory, diff --git a/cmd/gitserver/internal/vcssyncer/mock.go b/cmd/gitserver/internal/vcssyncer/mock.go index 78010deb962d..21b7e718e326 100644 --- a/cmd/gitserver/internal/vcssyncer/mock.go +++ b/cmd/gitserver/internal/vcssyncer/mock.go @@ -9,12 +9,10 @@ package vcssyncer import ( "context" "io" - "os/exec" "sync" common "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/common" api "github.com/sourcegraph/sourcegraph/internal/api" - vcs "github.com/sourcegraph/sourcegraph/internal/vcs" ) // MockVCSSyncer is a mock implementation of the VCSSyncer interface (from @@ -31,9 +29,6 @@ type MockVCSSyncer struct { // IsCloneableFunc is an instance of a mock function object controlling // the behavior of the method IsCloneable. IsCloneableFunc *VCSSyncerIsCloneableFunc - // RemoteShowCommandFunc is an instance of a mock function object - // controlling the behavior of the method RemoteShowCommand. - RemoteShowCommandFunc *VCSSyncerRemoteShowCommandFunc // TypeFunc is an instance of a mock function object controlling the // behavior of the method Type. TypeFunc *VCSSyncerTypeFunc @@ -44,22 +39,17 @@ type MockVCSSyncer struct { func NewMockVCSSyncer() *MockVCSSyncer { return &MockVCSSyncer{ CloneFunc: &VCSSyncerCloneFunc{ - defaultHook: func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) (r0 error) { + defaultHook: func(context.Context, api.RepoName, common.GitDir, string, io.Writer) (r0 error) { return }, }, FetchFunc: &VCSSyncerFetchFunc{ - defaultHook: func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) (r0 []byte, r1 error) { + defaultHook: func(context.Context, api.RepoName, common.GitDir, string) (r0 []byte, r1 error) { return }, }, IsCloneableFunc: &VCSSyncerIsCloneableFunc{ - defaultHook: func(context.Context, api.RepoName, *vcs.URL) (r0 error) { - return - }, - }, - RemoteShowCommandFunc: &VCSSyncerRemoteShowCommandFunc{ - defaultHook: func(context.Context, *vcs.URL) (r0 *exec.Cmd, r1 error) { + defaultHook: func(context.Context, api.RepoName) (r0 error) { return }, }, @@ -76,25 +66,20 @@ func NewMockVCSSyncer() *MockVCSSyncer { func NewStrictMockVCSSyncer() *MockVCSSyncer { return &MockVCSSyncer{ CloneFunc: &VCSSyncerCloneFunc{ - defaultHook: func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error { + defaultHook: func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error { panic("unexpected invocation of MockVCSSyncer.Clone") }, }, FetchFunc: &VCSSyncerFetchFunc{ - defaultHook: func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) { + defaultHook: func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) { panic("unexpected invocation of MockVCSSyncer.Fetch") }, }, IsCloneableFunc: &VCSSyncerIsCloneableFunc{ - defaultHook: func(context.Context, api.RepoName, *vcs.URL) error { + defaultHook: func(context.Context, api.RepoName) error { panic("unexpected invocation of MockVCSSyncer.IsCloneable") }, }, - RemoteShowCommandFunc: &VCSSyncerRemoteShowCommandFunc{ - defaultHook: func(context.Context, *vcs.URL) (*exec.Cmd, error) { - panic("unexpected invocation of MockVCSSyncer.RemoteShowCommand") - }, - }, TypeFunc: &VCSSyncerTypeFunc{ defaultHook: func() string { panic("unexpected invocation of MockVCSSyncer.Type") @@ -116,9 +101,6 @@ func NewMockVCSSyncerFrom(i VCSSyncer) *MockVCSSyncer { IsCloneableFunc: &VCSSyncerIsCloneableFunc{ defaultHook: i.IsCloneable, }, - RemoteShowCommandFunc: &VCSSyncerRemoteShowCommandFunc{ - defaultHook: i.RemoteShowCommand, - }, TypeFunc: &VCSSyncerTypeFunc{ defaultHook: i.Type, }, @@ -128,23 +110,23 @@ func NewMockVCSSyncerFrom(i VCSSyncer) *MockVCSSyncer { // VCSSyncerCloneFunc describes the behavior when the Clone method of the // parent MockVCSSyncer instance is invoked. type VCSSyncerCloneFunc struct { - defaultHook func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error - hooks []func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error + defaultHook func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error + hooks []func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error history []VCSSyncerCloneFuncCall mutex sync.Mutex } // Clone delegates to the next hook function in the queue and stores the // parameter and result values of this invocation. -func (m *MockVCSSyncer) Clone(v0 context.Context, v1 api.RepoName, v2 *vcs.URL, v3 common.GitDir, v4 string, v5 io.Writer) error { - r0 := m.CloneFunc.nextHook()(v0, v1, v2, v3, v4, v5) - m.CloneFunc.appendCall(VCSSyncerCloneFuncCall{v0, v1, v2, v3, v4, v5, r0}) +func (m *MockVCSSyncer) Clone(v0 context.Context, v1 api.RepoName, v2 common.GitDir, v3 string, v4 io.Writer) error { + r0 := m.CloneFunc.nextHook()(v0, v1, v2, v3, v4) + m.CloneFunc.appendCall(VCSSyncerCloneFuncCall{v0, v1, v2, v3, v4, r0}) return r0 } // SetDefaultHook sets function that is called when the Clone method of the // parent MockVCSSyncer instance is invoked and the hook queue is empty. -func (f *VCSSyncerCloneFunc) SetDefaultHook(hook func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error) { +func (f *VCSSyncerCloneFunc) SetDefaultHook(hook func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error) { f.defaultHook = hook } @@ -152,7 +134,7 @@ func (f *VCSSyncerCloneFunc) SetDefaultHook(hook func(context.Context, api.RepoN // Clone method of the parent MockVCSSyncer instance invokes the hook at the // front of the queue and discards it. After the queue is empty, the default // hook function is invoked for any future action. -func (f *VCSSyncerCloneFunc) PushHook(hook func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error) { +func (f *VCSSyncerCloneFunc) PushHook(hook func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error) { f.mutex.Lock() f.hooks = append(f.hooks, hook) f.mutex.Unlock() @@ -161,19 +143,19 @@ func (f *VCSSyncerCloneFunc) PushHook(hook func(context.Context, api.RepoName, * // SetDefaultReturn calls SetDefaultHook with a function that returns the // given values. func (f *VCSSyncerCloneFunc) SetDefaultReturn(r0 error) { - f.SetDefaultHook(func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error { + f.SetDefaultHook(func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error { return r0 }) } // PushReturn calls PushHook with a function that returns the given values. func (f *VCSSyncerCloneFunc) PushReturn(r0 error) { - f.PushHook(func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error { + f.PushHook(func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error { return r0 }) } -func (f *VCSSyncerCloneFunc) nextHook() func(context.Context, api.RepoName, *vcs.URL, common.GitDir, string, io.Writer) error { +func (f *VCSSyncerCloneFunc) nextHook() func(context.Context, api.RepoName, common.GitDir, string, io.Writer) error { f.mutex.Lock() defer f.mutex.Unlock() @@ -214,16 +196,13 @@ type VCSSyncerCloneFuncCall struct { Arg1 api.RepoName // Arg2 is the value of the 3rd argument passed to this method // invocation. - Arg2 *vcs.URL + Arg2 common.GitDir // Arg3 is the value of the 4th argument passed to this method // invocation. - Arg3 common.GitDir + Arg3 string // Arg4 is the value of the 5th argument passed to this method // invocation. - Arg4 string - // Arg5 is the value of the 6th argument passed to this method - // invocation. - Arg5 io.Writer + Arg4 io.Writer // Result0 is the value of the 1st result returned from this method // invocation. Result0 error @@ -232,7 +211,7 @@ type VCSSyncerCloneFuncCall struct { // Args returns an interface slice containing the arguments of this // invocation. func (c VCSSyncerCloneFuncCall) Args() []interface{} { - return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3, c.Arg4, c.Arg5} + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3, c.Arg4} } // Results returns an interface slice containing the results of this @@ -244,23 +223,23 @@ func (c VCSSyncerCloneFuncCall) Results() []interface{} { // VCSSyncerFetchFunc describes the behavior when the Fetch method of the // parent MockVCSSyncer instance is invoked. type VCSSyncerFetchFunc struct { - defaultHook func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) - hooks []func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) + defaultHook func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) + hooks []func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) history []VCSSyncerFetchFuncCall mutex sync.Mutex } // Fetch delegates to the next hook function in the queue and stores the // parameter and result values of this invocation. -func (m *MockVCSSyncer) Fetch(v0 context.Context, v1 *vcs.URL, v2 api.RepoName, v3 common.GitDir, v4 string) ([]byte, error) { - r0, r1 := m.FetchFunc.nextHook()(v0, v1, v2, v3, v4) - m.FetchFunc.appendCall(VCSSyncerFetchFuncCall{v0, v1, v2, v3, v4, r0, r1}) +func (m *MockVCSSyncer) Fetch(v0 context.Context, v1 api.RepoName, v2 common.GitDir, v3 string) ([]byte, error) { + r0, r1 := m.FetchFunc.nextHook()(v0, v1, v2, v3) + m.FetchFunc.appendCall(VCSSyncerFetchFuncCall{v0, v1, v2, v3, r0, r1}) return r0, r1 } // SetDefaultHook sets function that is called when the Fetch method of the // parent MockVCSSyncer instance is invoked and the hook queue is empty. -func (f *VCSSyncerFetchFunc) SetDefaultHook(hook func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error)) { +func (f *VCSSyncerFetchFunc) SetDefaultHook(hook func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error)) { f.defaultHook = hook } @@ -268,7 +247,7 @@ func (f *VCSSyncerFetchFunc) SetDefaultHook(hook func(context.Context, *vcs.URL, // Fetch method of the parent MockVCSSyncer instance invokes the hook at the // front of the queue and discards it. After the queue is empty, the default // hook function is invoked for any future action. -func (f *VCSSyncerFetchFunc) PushHook(hook func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error)) { +func (f *VCSSyncerFetchFunc) PushHook(hook func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error)) { f.mutex.Lock() f.hooks = append(f.hooks, hook) f.mutex.Unlock() @@ -277,19 +256,19 @@ func (f *VCSSyncerFetchFunc) PushHook(hook func(context.Context, *vcs.URL, api.R // SetDefaultReturn calls SetDefaultHook with a function that returns the // given values. func (f *VCSSyncerFetchFunc) SetDefaultReturn(r0 []byte, r1 error) { - f.SetDefaultHook(func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) { + f.SetDefaultHook(func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) { return r0, r1 }) } // PushReturn calls PushHook with a function that returns the given values. func (f *VCSSyncerFetchFunc) PushReturn(r0 []byte, r1 error) { - f.PushHook(func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) { + f.PushHook(func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) { return r0, r1 }) } -func (f *VCSSyncerFetchFunc) nextHook() func(context.Context, *vcs.URL, api.RepoName, common.GitDir, string) ([]byte, error) { +func (f *VCSSyncerFetchFunc) nextHook() func(context.Context, api.RepoName, common.GitDir, string) ([]byte, error) { f.mutex.Lock() defer f.mutex.Unlock() @@ -327,16 +306,13 @@ type VCSSyncerFetchFuncCall struct { Arg0 context.Context // Arg1 is the value of the 2nd argument passed to this method // invocation. - Arg1 *vcs.URL + Arg1 api.RepoName // Arg2 is the value of the 3rd argument passed to this method // invocation. - Arg2 api.RepoName + Arg2 common.GitDir // Arg3 is the value of the 4th argument passed to this method // invocation. - Arg3 common.GitDir - // Arg4 is the value of the 5th argument passed to this method - // invocation. - Arg4 string + Arg3 string // Result0 is the value of the 1st result returned from this method // invocation. Result0 []byte @@ -348,7 +324,7 @@ type VCSSyncerFetchFuncCall struct { // Args returns an interface slice containing the arguments of this // invocation. func (c VCSSyncerFetchFuncCall) Args() []interface{} { - return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3, c.Arg4} + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3} } // Results returns an interface slice containing the results of this @@ -360,24 +336,24 @@ func (c VCSSyncerFetchFuncCall) Results() []interface{} { // VCSSyncerIsCloneableFunc describes the behavior when the IsCloneable // method of the parent MockVCSSyncer instance is invoked. type VCSSyncerIsCloneableFunc struct { - defaultHook func(context.Context, api.RepoName, *vcs.URL) error - hooks []func(context.Context, api.RepoName, *vcs.URL) error + defaultHook func(context.Context, api.RepoName) error + hooks []func(context.Context, api.RepoName) error history []VCSSyncerIsCloneableFuncCall mutex sync.Mutex } // IsCloneable delegates to the next hook function in the queue and stores // the parameter and result values of this invocation. -func (m *MockVCSSyncer) IsCloneable(v0 context.Context, v1 api.RepoName, v2 *vcs.URL) error { - r0 := m.IsCloneableFunc.nextHook()(v0, v1, v2) - m.IsCloneableFunc.appendCall(VCSSyncerIsCloneableFuncCall{v0, v1, v2, r0}) +func (m *MockVCSSyncer) IsCloneable(v0 context.Context, v1 api.RepoName) error { + r0 := m.IsCloneableFunc.nextHook()(v0, v1) + m.IsCloneableFunc.appendCall(VCSSyncerIsCloneableFuncCall{v0, v1, r0}) return r0 } // SetDefaultHook sets function that is called when the IsCloneable method // of the parent MockVCSSyncer instance is invoked and the hook queue is // empty. -func (f *VCSSyncerIsCloneableFunc) SetDefaultHook(hook func(context.Context, api.RepoName, *vcs.URL) error) { +func (f *VCSSyncerIsCloneableFunc) SetDefaultHook(hook func(context.Context, api.RepoName) error) { f.defaultHook = hook } @@ -385,7 +361,7 @@ func (f *VCSSyncerIsCloneableFunc) SetDefaultHook(hook func(context.Context, api // IsCloneable method of the parent MockVCSSyncer instance invokes the hook // at the front of the queue and discards it. After the queue is empty, the // default hook function is invoked for any future action. -func (f *VCSSyncerIsCloneableFunc) PushHook(hook func(context.Context, api.RepoName, *vcs.URL) error) { +func (f *VCSSyncerIsCloneableFunc) PushHook(hook func(context.Context, api.RepoName) error) { f.mutex.Lock() f.hooks = append(f.hooks, hook) f.mutex.Unlock() @@ -394,19 +370,19 @@ func (f *VCSSyncerIsCloneableFunc) PushHook(hook func(context.Context, api.RepoN // SetDefaultReturn calls SetDefaultHook with a function that returns the // given values. func (f *VCSSyncerIsCloneableFunc) SetDefaultReturn(r0 error) { - f.SetDefaultHook(func(context.Context, api.RepoName, *vcs.URL) error { + f.SetDefaultHook(func(context.Context, api.RepoName) error { return r0 }) } // PushReturn calls PushHook with a function that returns the given values. func (f *VCSSyncerIsCloneableFunc) PushReturn(r0 error) { - f.PushHook(func(context.Context, api.RepoName, *vcs.URL) error { + f.PushHook(func(context.Context, api.RepoName) error { return r0 }) } -func (f *VCSSyncerIsCloneableFunc) nextHook() func(context.Context, api.RepoName, *vcs.URL) error { +func (f *VCSSyncerIsCloneableFunc) nextHook() func(context.Context, api.RepoName) error { f.mutex.Lock() defer f.mutex.Unlock() @@ -445,9 +421,6 @@ type VCSSyncerIsCloneableFuncCall struct { // Arg1 is the value of the 2nd argument passed to this method // invocation. Arg1 api.RepoName - // Arg2 is the value of the 3rd argument passed to this method - // invocation. - Arg2 *vcs.URL // Result0 is the value of the 1st result returned from this method // invocation. Result0 error @@ -456,7 +429,7 @@ type VCSSyncerIsCloneableFuncCall struct { // Args returns an interface slice containing the arguments of this // invocation. func (c VCSSyncerIsCloneableFuncCall) Args() []interface{} { - return []interface{}{c.Arg0, c.Arg1, c.Arg2} + return []interface{}{c.Arg0, c.Arg1} } // Results returns an interface slice containing the results of this @@ -465,114 +438,6 @@ func (c VCSSyncerIsCloneableFuncCall) Results() []interface{} { return []interface{}{c.Result0} } -// VCSSyncerRemoteShowCommandFunc describes the behavior when the -// RemoteShowCommand method of the parent MockVCSSyncer instance is invoked. -type VCSSyncerRemoteShowCommandFunc struct { - defaultHook func(context.Context, *vcs.URL) (*exec.Cmd, error) - hooks []func(context.Context, *vcs.URL) (*exec.Cmd, error) - history []VCSSyncerRemoteShowCommandFuncCall - mutex sync.Mutex -} - -// RemoteShowCommand delegates to the next hook function in the queue and -// stores the parameter and result values of this invocation. -func (m *MockVCSSyncer) RemoteShowCommand(v0 context.Context, v1 *vcs.URL) (*exec.Cmd, error) { - r0, r1 := m.RemoteShowCommandFunc.nextHook()(v0, v1) - m.RemoteShowCommandFunc.appendCall(VCSSyncerRemoteShowCommandFuncCall{v0, v1, r0, r1}) - return r0, r1 -} - -// SetDefaultHook sets function that is called when the RemoteShowCommand -// method of the parent MockVCSSyncer instance is invoked and the hook queue -// is empty. -func (f *VCSSyncerRemoteShowCommandFunc) SetDefaultHook(hook func(context.Context, *vcs.URL) (*exec.Cmd, error)) { - f.defaultHook = hook -} - -// PushHook adds a function to the end of hook queue. Each invocation of the -// RemoteShowCommand method of the parent MockVCSSyncer instance invokes the -// hook at the front of the queue and discards it. After the queue is empty, -// the default hook function is invoked for any future action. -func (f *VCSSyncerRemoteShowCommandFunc) PushHook(hook func(context.Context, *vcs.URL) (*exec.Cmd, error)) { - f.mutex.Lock() - f.hooks = append(f.hooks, hook) - f.mutex.Unlock() -} - -// SetDefaultReturn calls SetDefaultHook with a function that returns the -// given values. -func (f *VCSSyncerRemoteShowCommandFunc) SetDefaultReturn(r0 *exec.Cmd, r1 error) { - f.SetDefaultHook(func(context.Context, *vcs.URL) (*exec.Cmd, error) { - return r0, r1 - }) -} - -// PushReturn calls PushHook with a function that returns the given values. -func (f *VCSSyncerRemoteShowCommandFunc) PushReturn(r0 *exec.Cmd, r1 error) { - f.PushHook(func(context.Context, *vcs.URL) (*exec.Cmd, error) { - return r0, r1 - }) -} - -func (f *VCSSyncerRemoteShowCommandFunc) nextHook() func(context.Context, *vcs.URL) (*exec.Cmd, error) { - f.mutex.Lock() - defer f.mutex.Unlock() - - if len(f.hooks) == 0 { - return f.defaultHook - } - - hook := f.hooks[0] - f.hooks = f.hooks[1:] - return hook -} - -func (f *VCSSyncerRemoteShowCommandFunc) appendCall(r0 VCSSyncerRemoteShowCommandFuncCall) { - f.mutex.Lock() - f.history = append(f.history, r0) - f.mutex.Unlock() -} - -// History returns a sequence of VCSSyncerRemoteShowCommandFuncCall objects -// describing the invocations of this function. -func (f *VCSSyncerRemoteShowCommandFunc) History() []VCSSyncerRemoteShowCommandFuncCall { - f.mutex.Lock() - history := make([]VCSSyncerRemoteShowCommandFuncCall, len(f.history)) - copy(history, f.history) - f.mutex.Unlock() - - return history -} - -// VCSSyncerRemoteShowCommandFuncCall is an object that describes an -// invocation of method RemoteShowCommand on an instance of MockVCSSyncer. -type VCSSyncerRemoteShowCommandFuncCall struct { - // Arg0 is the value of the 1st argument passed to this method - // invocation. - Arg0 context.Context - // Arg1 is the value of the 2nd argument passed to this method - // invocation. - Arg1 *vcs.URL - // Result0 is the value of the 1st result returned from this method - // invocation. - Result0 *exec.Cmd - // Result1 is the value of the 2nd result returned from this method - // invocation. - Result1 error -} - -// Args returns an interface slice containing the arguments of this -// invocation. -func (c VCSSyncerRemoteShowCommandFuncCall) Args() []interface{} { - return []interface{}{c.Arg0, c.Arg1} -} - -// Results returns an interface slice containing the results of this -// invocation. -func (c VCSSyncerRemoteShowCommandFuncCall) Results() []interface{} { - return []interface{}{c.Result0, c.Result1} -} - // VCSSyncerTypeFunc describes the behavior when the Type method of the // parent MockVCSSyncer instance is invoked. type VCSSyncerTypeFunc struct { diff --git a/cmd/gitserver/internal/vcssyncer/npm_packages.go b/cmd/gitserver/internal/vcssyncer/npm_packages.go index 81d21763e0b4..3d16faf5ea64 100644 --- a/cmd/gitserver/internal/vcssyncer/npm_packages.go +++ b/cmd/gitserver/internal/vcssyncer/npm_packages.go @@ -26,6 +26,7 @@ func NewNpmPackagesSyncer( connection schema.NpmPackagesConnection, svc *dependencies.Service, client npm.Client, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir string, ) VCSSyncer { placeholder, err := reposource.ParseNpmVersionedPackage("@sourcegraph/placeholder@1.0.0") @@ -34,14 +35,15 @@ func NewNpmPackagesSyncer( } return &vcsPackagesSyncer{ - logger: log.Scoped("NPMPackagesSyncer"), - typ: "npm_packages", - scheme: dependencies.NpmPackagesScheme, - placeholder: placeholder, - svc: svc, - configDeps: connection.Dependencies, - reposDir: reposDir, - source: &npmPackagesSyncer{client: client}, + logger: log.Scoped("NPMPackagesSyncer"), + typ: "npm_packages", + scheme: dependencies.NpmPackagesScheme, + placeholder: placeholder, + svc: svc, + configDeps: connection.Dependencies, + reposDir: reposDir, + source: &npmPackagesSyncer{client: client}, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/npm_packages_test.go b/cmd/gitserver/internal/vcssyncer/npm_packages_test.go index 7f198c72a9f5..f1fd5e733cb9 100644 --- a/cmd/gitserver/internal/vcssyncer/npm_packages_test.go +++ b/cmd/gitserver/internal/vcssyncer/npm_packages_test.go @@ -6,6 +6,8 @@ import ( "compress/gzip" "context" "fmt" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/vcs" "io" "io/fs" "os" @@ -104,17 +106,28 @@ func TestNpmCloneCommand(t *testing.T) { }, } + var testGetRemoteURLSource = func(ctx context.Context, repoName api.RepoName) (RemoteURLSource, error) { + return RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + u, err := vcs.ParseURL(exampleNpmPackageURL) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse example package URL %q", exampleNpmPackageURL) + } + return u, nil + }), nil + } + depsSvc := dependencies.TestService(database.NewDB(logger, dbtest.NewDB(t))) s := NewNpmPackagesSyncer( schema.NpmPackagesConnection{Dependencies: []string{}}, depsSvc, &client, + testGetRemoteURLSource, dir, ).(*vcsPackagesSyncer) bareGitDirectory := path.Join(dir, "git") - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{exampleNpmVersionedPackage}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleNpmVersionedPackage}) checkSingleTag := func() { assertCommandOutput(t, exec.Command("git", "tag", "--list"), @@ -128,7 +141,7 @@ func TestNpmCloneCommand(t *testing.T) { } checkSingleTag() - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{exampleNpmVersionedPackage, exampleNpmVersionedPackage2}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleNpmVersionedPackage, exampleNpmVersionedPackage2}) checkTagAdded := func() { assertCommandOutput(t, exec.Command("git", "tag", "--list"), @@ -148,7 +161,7 @@ func TestNpmCloneCommand(t *testing.T) { } checkTagAdded() - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{exampleNpmVersionedPackage}) + s.runCloneCommand(t, bareGitDirectory, []string{exampleNpmVersionedPackage}) assertCommandOutput(t, exec.Command("git", "show", fmt.Sprintf("v%s:%s", exampleNpmVersion, exampleJSFilepath)), bareGitDirectory, @@ -170,7 +183,7 @@ func TestNpmCloneCommand(t *testing.T) { }); err != nil { t.Fatalf(err.Error()) } - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{}) + s.runCloneCommand(t, bareGitDirectory, []string{}) checkSingleTag() if _, _, err := depsSvc.InsertPackageRepoRefs(context.Background(), []dependencies.MinimalPackageRepoRef{ @@ -182,13 +195,13 @@ func TestNpmCloneCommand(t *testing.T) { }); err != nil { t.Fatalf(err.Error()) } - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{}) + s.runCloneCommand(t, bareGitDirectory, []string{}) checkTagAdded() if err := depsSvc.DeletePackageRepoRefVersionsByID(context.Background(), 2); err != nil { t.Fatalf(err.Error()) } - s.runCloneCommand(t, exampleNpmPackageURL, bareGitDirectory, []string{}) + s.runCloneCommand(t, bareGitDirectory, []string{}) assertCommandOutput(t, exec.Command("git", "show", fmt.Sprintf("v%s:%s", exampleNpmVersion, exampleJSFilepath)), bareGitDirectory, diff --git a/cmd/gitserver/internal/vcssyncer/packages_syncer.go b/cmd/gitserver/internal/vcssyncer/packages_syncer.go index 9a5bb7855113..1ee6170c1099 100644 --- a/cmd/gitserver/internal/vcssyncer/packages_syncer.go +++ b/cmd/gitserver/internal/vcssyncer/packages_syncer.go @@ -20,7 +20,6 @@ import ( "github.com/sourcegraph/sourcegraph/internal/codeintel/dependencies" "github.com/sourcegraph/sourcegraph/internal/conf/reposource" "github.com/sourcegraph/sourcegraph/internal/errcode" - "github.com/sourcegraph/sourcegraph/internal/vcs" "github.com/sourcegraph/sourcegraph/internal/wrexec" "github.com/sourcegraph/sourcegraph/lib/errors" ) @@ -35,11 +34,12 @@ type vcsPackagesSyncer struct { // placeholder is used to set GIT_AUTHOR_NAME for git commands that don't create // commits or tags. The name of this dependency should never be publicly visible, // so it can have any random value. - placeholder reposource.VersionedPackage - configDeps []string - source packagesSource - svc dependenciesService - reposDir string + placeholder reposource.VersionedPackage + configDeps []string + source packagesSource + svc dependenciesService + reposDir string + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error) } var _ VCSSyncer = &vcsPackagesSyncer{} @@ -71,7 +71,7 @@ type dependenciesService interface { IsPackageRepoVersionAllowed(ctx context.Context, scheme string, pkg reposource.PackageName, version string) (allowed bool, err error) } -func (s *vcsPackagesSyncer) IsCloneable(_ context.Context, _ api.RepoName, _ *vcs.URL) error { +func (s *vcsPackagesSyncer) IsCloneable(_ context.Context, _ api.RepoName) error { return nil } @@ -79,14 +79,10 @@ func (s *vcsPackagesSyncer) Type() string { return s.typ } -func (s *vcsPackagesSyncer) RemoteShowCommand(ctx context.Context, remoteURL *vcs.URL) (cmd *exec.Cmd, err error) { - return exec.CommandContext(ctx, "git", "remote", "show", "./"), nil -} - // Clone writes a package and all requested versions of it into a synthetic git // repo at tmpPath by creating one head per version. // It reports redacted progress logs via the progressWriter. -func (s *vcsPackagesSyncer) Clone(ctx context.Context, repo api.RepoName, remoteURL *vcs.URL, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { +func (s *vcsPackagesSyncer) Clone(ctx context.Context, repo api.RepoName, _ common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { // First, make sure the tmpPath exists. if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { return errors.Wrapf(err, "clone failed to create tmp dir") @@ -102,16 +98,26 @@ func (s *vcsPackagesSyncer) Clone(ctx context.Context, repo api.RepoName, remote // The Fetch method is responsible for cleaning up temporary directories. // TODO: We should have more fine-grained progress reporting here. tryWrite(s.logger, progressWriter, "Fetching package revisions\n") - if _, err := s.Fetch(ctx, remoteURL, "", common.GitDir(tmpPath), ""); err != nil { + if _, err := s.Fetch(ctx, repo, common.GitDir(tmpPath), ""); err != nil { return errors.Wrapf(err, "failed to fetch repo for %s", repo) } return nil } -func (s *vcsPackagesSyncer) Fetch(ctx context.Context, remoteURL *vcs.URL, _ api.RepoName, dir common.GitDir, revspec string) ([]byte, error) { +func (s *vcsPackagesSyncer) Fetch(ctx context.Context, repo api.RepoName, dir common.GitDir, revspec string) ([]byte, error) { + source, err := s.getRemoteURLSource(ctx, repo) + if err != nil { + return nil, errors.Wrap(err, "getting remote URL source") + } + + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return nil, errors.Wrap(err, "getting remote URL") // This should never happen for Perforce + } + var pkg reposource.Package - pkg, err := s.source.ParsePackageFromRepoName(api.RepoName(remoteURL.Path)) + pkg, err = s.source.ParsePackageFromRepoName(api.RepoName(remoteURL.Path)) if err != nil { return nil, err } diff --git a/cmd/gitserver/internal/vcssyncer/packages_syncer_test.go b/cmd/gitserver/internal/vcssyncer/packages_syncer_test.go index 94489ef153e1..918fc154c3db 100644 --- a/cmd/gitserver/internal/vcssyncer/packages_syncer_test.go +++ b/cmd/gitserver/internal/vcssyncer/packages_syncer_test.go @@ -36,6 +36,8 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { } depsService := &fakeDepsService{deps: map[reposource.PackageName]dependencies.PackageRepoReference{}} + remoteURL := &vcs.URL{URL: url.URL{Path: "fake/foo"}} + s := vcsPackagesSyncer{ logger: logtest.Scoped(t), typ: "fake", @@ -44,19 +46,22 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { source: depsSource, svc: depsService, reposDir: t.TempDir(), + getRemoteURLSource: func(ctx context.Context, name api.RepoName) (RemoteURLSource, error) { + return RemoteURLSourceFunc(func(_ context.Context) (*vcs.URL, error) { + return remoteURL, nil + }), nil + }, } - remoteURL := &vcs.URL{URL: url.URL{Path: "fake/foo"}} - dir := common.GitDir(t.TempDir()) - err := s.Clone(ctx, "repo", remoteURL, common.GitDir(dir), string(dir), io.Discard) + err := s.Clone(ctx, "repo", common.GitDir(dir), string(dir), io.Discard) require.NoError(t, err) depsService.Add("foo@0.0.1") depsSource.Add("foo@0.0.1") t.Run("one version from service", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.NoError(t, err) s.assertRefs(t, dir, map[string]string{ @@ -79,7 +84,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { oneVersionOneDownload := map[string]int{"foo@0.0.1": 1, "foo@0.0.2": 1} t.Run("two versions, service and config", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.NoError(t, err) s.assertRefs(t, dir, allVersionsHaveRefs) @@ -89,7 +94,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { depsSource.Delete("foo@0.0.2") t.Run("cached tag not re-downloaded (404 not found)", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.NoError(t, err) // v0.0.2 is still present in the git repo because we didn't send a second download request. @@ -101,7 +106,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { depsSource.download["foo@0.0.1"] = errors.New("401 unauthorized") t.Run("cached tag not re-downloaded (401 unauthorized)", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") // v0.0.1 is still present in the git repo because we didn't send a second download request. require.NoError(t, err) s.assertRefs(t, dir, allVersionsHaveRefs) @@ -116,7 +121,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { } t.Run("service version deleted", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.NoError(t, err) s.assertRefs(t, dir, onlyV2Refs) @@ -126,7 +131,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { s.configDeps = []string{} t.Run("all versions deleted", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.NoError(t, err) s.assertRefs(t, dir, map[string]string{}) @@ -138,7 +143,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { depsService.Add("foo@0.0.2") depsSource.Add("foo@0.0.2") t.Run("error aggregation", func(t *testing.T) { - _, err := s.Fetch(ctx, remoteURL, "", dir, "") + _, err := s.Fetch(ctx, "", dir, "") require.ErrorContains(t, err, "401 unauthorized") // The foo@0.0.1 tag was not created because of the 401 error. @@ -161,7 +166,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { t.Run("lazy-sync version via revspec", func(t *testing.T) { // the v0.0.3 tag should be created on-demand through the revspec parameter // For context, see https://github.com/sourcegraph/sourcegraph/pull/38811 - _, err := s.Fetch(ctx, remoteURL, "", dir, "v0.0.3^0") + _, err := s.Fetch(ctx, "", dir, "v0.0.3^0") require.ErrorContains(t, err, "401 unauthorized") // v0.0.1 is still erroring require.Equal(t, s.svc.(*fakeDepsService).upsertedDeps, []dependencies.MinimalPackageRepoRef{{ Scheme: fakeVersionedPackage{}.Scheme(), @@ -179,7 +184,7 @@ func TestVcsDependenciesSyncer_Fetch(t *testing.T) { t.Run("lazy-sync error version via revspec", func(t *testing.T) { // the v0.0.4 tag cannot be created on-demand because it returns a "0.0.4 not found" error - _, err := s.Fetch(ctx, remoteURL, "", dir, "v0.0.4^0") + _, err := s.Fetch(ctx, "", dir, "v0.0.4^0") require.Nil(t, err) // // the 0.0.4 error is silently ignored, we only return the error for v0.0.1. // require.Equal(t, fmt.Sprint(err.Error()), "error pushing dependency {\"foo\" \"0.0.1\"}: 401 unauthorized") @@ -371,12 +376,10 @@ func (f fakeVersionedPackage) Less(other reposource.VersionedPackage) bool { return f.VersionedPackageSyntax() > other.VersionedPackageSyntax() } -func (s *vcsPackagesSyncer) runCloneCommand(t *testing.T, examplePackageURL, bareGitDirectory string, dependencies []string) { - u := vcs.URL{ - URL: url.URL{Path: examplePackageURL}, - } +func (s *vcsPackagesSyncer) runCloneCommand(t *testing.T, bareGitDirectory string, dependencies []string) { + s.configDeps = dependencies - err := s.Clone(context.Background(), "repo", &u, common.GitDir(bareGitDirectory), string(bareGitDirectory), io.Discard) + err := s.Clone(context.Background(), "repo", common.GitDir(bareGitDirectory), string(bareGitDirectory), io.Discard) assert.Nil(t, err) } diff --git a/cmd/gitserver/internal/vcssyncer/perforce.go b/cmd/gitserver/internal/vcssyncer/perforce.go index f7c80b083f0e..5c1069fa6e06 100644 --- a/cmd/gitserver/internal/vcssyncer/perforce.go +++ b/cmd/gitserver/internal/vcssyncer/perforce.go @@ -17,7 +17,6 @@ import ( "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/executil" "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/perforce" "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/urlredactor" - "github.com/sourcegraph/sourcegraph/internal/vcs" "github.com/sourcegraph/sourcegraph/internal/wrexec" "github.com/sourcegraph/sourcegraph/lib/errors" ) @@ -43,9 +42,12 @@ type perforceDepotSyncer struct { // reposDir is the directory where repositories are cloned. reposDir string + + // getRemoteURLSource returns the RemoteURLSource for the given repository. + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error) } -func NewPerforceDepotSyncer(logger log.Logger, r *wrexec.RecordingCommandFactory, connection *schema.PerforceConnection, reposDir, p4Home string) VCSSyncer { +func NewPerforceDepotSyncer(logger log.Logger, r *wrexec.RecordingCommandFactory, connection *schema.PerforceConnection, getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir, p4Home string) VCSSyncer { return &perforceDepotSyncer{ logger: logger.Scoped("PerforceDepotSyncer"), recordingCommandFactory: r, @@ -54,6 +56,7 @@ func NewPerforceDepotSyncer(logger log.Logger, r *wrexec.RecordingCommandFactory FusionConfig: configureFusionClient(connection), reposDir: reposDir, P4Home: p4Home, + getRemoteURLSource: getRemoteURLSource, } } @@ -62,7 +65,17 @@ func (s *perforceDepotSyncer) Type() string { } // IsCloneable checks to see if the Perforce remote URL is cloneable. -func (s *perforceDepotSyncer) IsCloneable(ctx context.Context, _ api.RepoName, remoteURL *vcs.URL) error { +func (s *perforceDepotSyncer) IsCloneable(ctx context.Context, repoName api.RepoName) error { + source, err := s.getRemoteURLSource(ctx, repoName) + if err != nil { + return errors.Wrap(err, "getting remote URL source") + } + + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return errors.Wrap(err, "getting remote URL") // This should never happen for Perforce + } + username, password, host, path, err := perforce.DecomposePerforceRemoteURL(remoteURL) if err != nil { return errors.Wrap(err, "invalid perforce remote URL") @@ -82,7 +95,17 @@ func (s *perforceDepotSyncer) IsCloneable(ctx context.Context, _ api.RepoName, r // Clone writes a Perforce depot into tmpPath, using a Perforce-to-git-conversion. // It reports redacted progress logs via the progressWriter. -func (s *perforceDepotSyncer) Clone(ctx context.Context, repo api.RepoName, remoteURL *vcs.URL, _ common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { +func (s *perforceDepotSyncer) Clone(ctx context.Context, repo api.RepoName, _ common.GitDir, tmpPath string, progressWriter io.Writer) (err error) { + source, err := s.getRemoteURLSource(ctx, repo) + if err != nil { + return errors.Wrap(err, "getting remote URL source") + } + + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return errors.Wrap(err, "getting remote URL") // This should never happen for Perforce + } + // First, make sure the tmpPath exists. if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { return errors.Wrapf(err, "clone failed to create tmp dir") @@ -193,7 +216,17 @@ func (s *perforceDepotSyncer) buildP4FusionCmd(ctx context.Context, depot, usern } // Fetch tries to fetch updates of a Perforce depot as a Git repository. -func (s *perforceDepotSyncer) Fetch(ctx context.Context, remoteURL *vcs.URL, _ api.RepoName, dir common.GitDir, _ string) ([]byte, error) { +func (s *perforceDepotSyncer) Fetch(ctx context.Context, repoName api.RepoName, dir common.GitDir, _ string) ([]byte, error) { + source, err := s.getRemoteURLSource(ctx, repoName) + if err != nil { + return nil, errors.Wrap(err, "getting remote URL source") + } + + remoteURL, err := source.RemoteURL(ctx) + if err != nil { + return nil, errors.Wrap(err, "getting remote URL") // This should never happen for Perforce + } + p4user, p4passwd, p4port, depot, err := perforce.DecomposePerforceRemoteURL(remoteURL) if err != nil { return nil, errors.Wrap(err, "invalid perforce remote URL") @@ -251,12 +284,6 @@ func (s *perforceDepotSyncer) Fetch(ctx context.Context, remoteURL *vcs.URL, _ a return output, nil } -// RemoteShowCommand returns the command to be executed for showing Git remote of a Perforce depot. -func (s *perforceDepotSyncer) RemoteShowCommand(ctx context.Context, _ *vcs.URL) (cmd *exec.Cmd, err error) { - // Remote info is encoded as in the current repository - return exec.CommandContext(ctx, "git", "remote", "show", "./"), nil -} - func (s *perforceDepotSyncer) p4CommandOptions() []string { flags := []string{} if s.MaxChanges > 0 { diff --git a/cmd/gitserver/internal/vcssyncer/python_packages.go b/cmd/gitserver/internal/vcssyncer/python_packages.go index 9512430563f7..c6abac23f540 100644 --- a/cmd/gitserver/internal/vcssyncer/python_packages.go +++ b/cmd/gitserver/internal/vcssyncer/python_packages.go @@ -25,17 +25,19 @@ func NewPythonPackagesSyncer( connection *schema.PythonPackagesConnection, svc *dependencies.Service, client *pypi.Client, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir string, ) VCSSyncer { return &vcsPackagesSyncer{ - logger: log.Scoped("PythonPackagesSyncer"), - typ: "python_packages", - scheme: dependencies.PythonPackagesScheme, - placeholder: reposource.ParseVersionedPackage("sourcegraph.com/placeholder@v0.0.0"), - svc: svc, - configDeps: connection.Dependencies, - source: &pythonPackagesSyncer{client: client, reposDir: reposDir}, - reposDir: reposDir, + logger: log.Scoped("PythonPackagesSyncer"), + typ: "python_packages", + scheme: dependencies.PythonPackagesScheme, + placeholder: reposource.ParseVersionedPackage("sourcegraph.com/placeholder@v0.0.0"), + svc: svc, + configDeps: connection.Dependencies, + source: &pythonPackagesSyncer{client: client, reposDir: reposDir}, + reposDir: reposDir, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/ruby_packages.go b/cmd/gitserver/internal/vcssyncer/ruby_packages.go index 0c495e3f9ada..50626501c228 100644 --- a/cmd/gitserver/internal/vcssyncer/ruby_packages.go +++ b/cmd/gitserver/internal/vcssyncer/ruby_packages.go @@ -24,17 +24,19 @@ func NewRubyPackagesSyncer( connection *schema.RubyPackagesConnection, svc *dependencies.Service, client *rubygems.Client, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir string, ) VCSSyncer { return &vcsPackagesSyncer{ - logger: log.Scoped("RubyPackagesSyncer"), - typ: "ruby_packages", - scheme: dependencies.RubyPackagesScheme, - placeholder: reposource.NewRubyVersionedPackage("sourcegraph/placeholder", "0.0.0"), - svc: svc, - configDeps: connection.Dependencies, - reposDir: reposDir, - source: &rubyDependencySource{client: client}, + logger: log.Scoped("RubyPackagesSyncer"), + typ: "ruby_packages", + scheme: dependencies.RubyPackagesScheme, + placeholder: reposource.NewRubyVersionedPackage("sourcegraph/placeholder", "0.0.0"), + svc: svc, + configDeps: connection.Dependencies, + reposDir: reposDir, + source: &rubyDependencySource{client: client}, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/rust_packages.go b/cmd/gitserver/internal/vcssyncer/rust_packages.go index 8d5fd32836d6..4c9596cebd0b 100644 --- a/cmd/gitserver/internal/vcssyncer/rust_packages.go +++ b/cmd/gitserver/internal/vcssyncer/rust_packages.go @@ -21,17 +21,19 @@ func NewRustPackagesSyncer( connection *schema.RustPackagesConnection, svc *dependencies.Service, client *crates.Client, + getRemoteURLSource func(ctx context.Context, name api.RepoName) (RemoteURLSource, error), reposDir string, ) VCSSyncer { return &vcsPackagesSyncer{ - logger: log.Scoped("RustPackagesSyncer"), - typ: "rust_packages", - scheme: dependencies.RustPackagesScheme, - placeholder: reposource.ParseRustVersionedPackage("sourcegraph.com/placeholder@0.0.0"), - svc: svc, - configDeps: connection.Dependencies, - reposDir: reposDir, - source: &rustDependencySource{client: client}, + logger: log.Scoped("RustPackagesSyncer"), + typ: "rust_packages", + scheme: dependencies.RustPackagesScheme, + placeholder: reposource.ParseRustVersionedPackage("sourcegraph.com/placeholder@0.0.0"), + svc: svc, + configDeps: connection.Dependencies, + reposDir: reposDir, + source: &rustDependencySource{client: client}, + getRemoteURLSource: getRemoteURLSource, } } diff --git a/cmd/gitserver/internal/vcssyncer/syncer.go b/cmd/gitserver/internal/vcssyncer/syncer.go index 173000f60cd9..9f2dcac92cb3 100644 --- a/cmd/gitserver/internal/vcssyncer/syncer.go +++ b/cmd/gitserver/internal/vcssyncer/syncer.go @@ -2,10 +2,9 @@ package vcssyncer import ( "context" - "io" - "os/exec" - jsoniter "github.com/json-iterator/go" + "github.com/sourcegraph/sourcegraph/internal/vcs" + "io" "github.com/sourcegraph/log" @@ -23,7 +22,6 @@ import ( "github.com/sourcegraph/sourcegraph/internal/extsvc/rubygems" "github.com/sourcegraph/sourcegraph/internal/httpcli" "github.com/sourcegraph/sourcegraph/internal/jsonc" - "github.com/sourcegraph/sourcegraph/internal/vcs" "github.com/sourcegraph/sourcegraph/internal/wrexec" "github.com/sourcegraph/sourcegraph/lib/errors" "github.com/sourcegraph/sourcegraph/schema" @@ -36,7 +34,10 @@ type VCSSyncer interface { Type() string // IsCloneable checks to see if the VCS remote URL is cloneable. Any non-nil // error indicates there is a problem. - IsCloneable(ctx context.Context, repoName api.RepoName, remoteURL *vcs.URL) error + // + // All implementations should redact any sensitive information from the + // error message. + IsCloneable(ctx context.Context, repoName api.RepoName) error // Clone should clone the repo onto disk into the given tmpPath. // // For now, regardless of the VCSSyncer implementation, the result that ends @@ -53,7 +54,7 @@ type VCSSyncer interface { // sensitive data like secrets. // Progress reported through the progressWriter will be streamed line-by-line // with both LF and CR being valid line terminators. - Clone(ctx context.Context, repo api.RepoName, remoteURL *vcs.URL, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) error + Clone(ctx context.Context, repo api.RepoName, targetDir common.GitDir, tmpPath string, progressWriter io.Writer) error // Fetch tries to fetch updates from the remote to given directory. // The revspec parameter is optional and specifies that the client is specifically // interested in fetching the provided revspec (example "v2.3.4^0"). @@ -61,9 +62,7 @@ type VCSSyncer interface { // to lazily fetch package versions. More details at // https://github.com/sourcegraph/sourcegraph/issues/37921#issuecomment-1184301885 // Beware that the revspec parameter can be any random user-provided string. - Fetch(ctx context.Context, remoteURL *vcs.URL, repoName api.RepoName, dir common.GitDir, revspec string) ([]byte, error) - // RemoteShowCommand returns the command to be executed for showing remote. - RemoteShowCommand(ctx context.Context, remoteURL *vcs.URL) (cmd *exec.Cmd, err error) + Fetch(ctx context.Context, repoName api.RepoName, dir common.GitDir, revspec string) ([]byte, error) } type NewVCSSyncerOpts struct { @@ -75,6 +74,7 @@ type NewVCSSyncerOpts struct { CoursierCacheDir string RecordingCommandFactory *wrexec.RecordingCommandFactory Logger log.Logger + GetRemoteURLSource func(ctx context.Context, repo api.RepoName) (RemoteURLSource, error) } func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error) { @@ -120,13 +120,13 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error return nil, err } - return NewPerforceDepotSyncer(opts.Logger, opts.RecordingCommandFactory, &c, opts.ReposDir, p4Home), nil + return NewPerforceDepotSyncer(opts.Logger, opts.RecordingCommandFactory, &c, opts.GetRemoteURLSource, opts.ReposDir, p4Home), nil case extsvc.TypeJVMPackages: var c schema.JVMPackagesConnection if _, err := extractOptions(&c); err != nil { return nil, err } - return NewJVMPackagesSyncer(&c, opts.DepsSvc, opts.CoursierCacheDir, opts.ReposDir), nil + return NewJVMPackagesSyncer(&c, opts.DepsSvc, opts.GetRemoteURLSource, opts.CoursierCacheDir, opts.ReposDir), nil case extsvc.TypeNpmPackages: var c schema.NpmPackagesConnection urn, err := extractOptions(&c) @@ -137,7 +137,7 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error if err != nil { return nil, err } - return NewNpmPackagesSyncer(c, opts.DepsSvc, cli, opts.ReposDir), nil + return NewNpmPackagesSyncer(c, opts.DepsSvc, cli, opts.GetRemoteURLSource, opts.ReposDir), nil case extsvc.TypeGoModules: var c schema.GoModulesConnection urn, err := extractOptions(&c) @@ -145,7 +145,7 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error return nil, err } cli := gomodproxy.NewClient(urn, c.Urls, httpcli.ExternalClientFactory) - return NewGoModulesSyncer(&c, opts.DepsSvc, cli, opts.ReposDir), nil + return NewGoModulesSyncer(&c, opts.DepsSvc, cli, opts.GetRemoteURLSource, opts.ReposDir), nil case extsvc.TypePythonPackages: var c schema.PythonPackagesConnection urn, err := extractOptions(&c) @@ -156,7 +156,7 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error if err != nil { return nil, err } - return NewPythonPackagesSyncer(&c, opts.DepsSvc, cli, opts.ReposDir), nil + return NewPythonPackagesSyncer(&c, opts.DepsSvc, cli, opts.GetRemoteURLSource, opts.ReposDir), nil case extsvc.TypeRustPackages: var c schema.RustPackagesConnection urn, err := extractOptions(&c) @@ -167,7 +167,7 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error if err != nil { return nil, err } - return NewRustPackagesSyncer(&c, opts.DepsSvc, cli, opts.ReposDir), nil + return NewRustPackagesSyncer(&c, opts.DepsSvc, cli, opts.GetRemoteURLSource, opts.ReposDir), nil case extsvc.TypeRubyPackages: var c schema.RubyPackagesConnection urn, err := extractOptions(&c) @@ -178,12 +178,31 @@ func NewVCSSyncer(ctx context.Context, opts *NewVCSSyncerOpts) (VCSSyncer, error if err != nil { return nil, err } - return NewRubyPackagesSyncer(&c, opts.DepsSvc, cli, opts.ReposDir), nil + return NewRubyPackagesSyncer(&c, opts.DepsSvc, cli, opts.GetRemoteURLSource, opts.ReposDir), nil } - return NewGitRepoSyncer(opts.Logger, opts.RecordingCommandFactory), nil + return NewGitRepoSyncer(opts.Logger, opts.RecordingCommandFactory, opts.GetRemoteURLSource), nil } type notFoundError struct{ error } func (e notFoundError) NotFound() bool { return true } + +// RemoteURLSource is a source of a remote URL for a repository. +// +// The remote URL may change over time, so it's important to use this interface +// to get the remote URL every time instead of caching it at the call site. +type RemoteURLSource interface { + // RemoteURL returns the latest remote URL for a repository. + RemoteURL(ctx context.Context) (*vcs.URL, error) +} + +// RemoteURLSourceFunc is an adapter to allow the use of ordinary functions as +// RemoteURLSource. +type RemoteURLSourceFunc func(ctx context.Context) (*vcs.URL, error) + +func (f RemoteURLSourceFunc) RemoteURL(ctx context.Context) (*vcs.URL, error) { + return f(ctx) +} + +var _ RemoteURLSource = RemoteURLSourceFunc(nil) diff --git a/cmd/gitserver/shared/BUILD.bazel b/cmd/gitserver/shared/BUILD.bazel index c18b3a68efa7..9258a5face45 100644 --- a/cmd/gitserver/shared/BUILD.bazel +++ b/cmd/gitserver/shared/BUILD.bazel @@ -48,6 +48,7 @@ go_library( "//internal/requestinteraction", "//internal/service", "//internal/trace", + "//internal/vcs", "//internal/wrexec", "//lib/errors", "@com_github_sourcegraph_log//:log", diff --git a/cmd/gitserver/shared/shared.go b/cmd/gitserver/shared/shared.go index ffc84ab1a840..9014fc025aad 100644 --- a/cmd/gitserver/shared/shared.go +++ b/cmd/gitserver/shared/shared.go @@ -7,6 +7,7 @@ import ( "database/sql" "encoding/json" "fmt" + "github.com/sourcegraph/sourcegraph/internal/vcs" "net/http" "os/exec" "path/filepath" @@ -111,6 +112,26 @@ func Main(ctx context.Context, observationCtx *observation.Context, ready servic CoursierCacheDir: config.CoursierCacheDir, RecordingCommandFactory: recordingCommandFactory, Logger: logger, + GetRemoteURLSource: func(ctx context.Context, repo api.RepoName) (vcssyncer.RemoteURLSource, error) { + return vcssyncer.RemoteURLSourceFunc(func(ctx context.Context) (*vcs.URL, error) { + rawURL, err := getRemoteURLFunc(ctx, logger, db, repo) + if err != nil { + return nil, errors.Wrapf(err, "getting remote URL for %q", repo) + + } + + u, err := vcs.ParseURL(rawURL) + if err != nil { + // TODO@ggilmore: Note that we can't redact the URL here because we can't + // parse it to know where the sensitive information is. + return nil, errors.Wrapf(err, "parsing remote URL %q", rawURL) + } + + return u, nil + + }), nil + + }, }) }, Hostname: config.ExternalAddress,