diff --git a/prow/config/inrepoconfig.go b/prow/config/inrepoconfig.go index f978e311e59c..ab4cae4e7724 100644 --- a/prow/config/inrepoconfig.go +++ b/prow/config/inrepoconfig.go @@ -43,6 +43,25 @@ const ( inRepoConfigDirName = ".prow" ) +var inrepoconfigRepoOpts = git.RepoOpts{ + // Technically we only need inRepoConfigDirName (".prow") because the + // default "cone mode" of sparse checkouts already include files at the + // toplevel (which would include ".prow.yaml"). + // + // TODO (listx): The version of git shipped in kubekins-e2e (itself + // derived from the bootstrap image) uses git version 2.30.2, which does + // not populate files at the toplevel. So we have to also set a sparse + // checkout of ".prow.yaml". Later when that image is updated, we can + // remove the use of inRepoConfigFileName (".prow.yaml"), so that the + // unit tests in CI can pass. As for the Prow components themselves, + // they use a different version of Git based on alpine (see .ko.yaml in + // the root). + SparseCheckoutDirs: []string{inRepoConfigDirName, inRepoConfigFileName}, + // The sparse checkout would avoid creating another copy of Git objects + // from the mirror clone into the secondary clone. + ShareObjectsWithSourceRepo: true, +} + var inrepoconfigMetrics = struct { gitCloneDuration *prometheus.HistogramVec gitOtherDuration *prometheus.HistogramVec @@ -119,7 +138,7 @@ func prowYAMLGetter( } timeBeforeClone := time.Now() - repo, err := gc.ClientFor(orgRepo.Org, orgRepo.Repo) + repo, err := gc.ClientForWithRepoOpts(orgRepo.Org, orgRepo.Repo, inrepoconfigRepoOpts) inrepoconfigMetrics.gitCloneDuration.WithLabelValues(orgRepo.Org, orgRepo.Repo).Observe((float64(time.Since(timeBeforeClone).Seconds()))) if err != nil { return nil, fmt.Errorf("failed to clone repo for %q: %w", identifier, err) @@ -379,7 +398,7 @@ func (c *InRepoConfigGitCache) ClientFor(org, repo string) (git.RepoClient, erro if cached != nil || err != nil { return cached, err } - coreClient, err := c.ClientFactory.ClientFor(org, repo) + coreClient, err := c.ClientFactory.ClientForWithRepoOpts(org, repo, inrepoconfigRepoOpts) if err != nil { return nil, err } diff --git a/prow/config/inrepoconfig_test.go b/prow/config/inrepoconfig_test.go index 255934d77b69..54e37e4fcf07 100644 --- a/prow/config/inrepoconfig_test.go +++ b/prow/config/inrepoconfig_test.go @@ -681,6 +681,13 @@ func (cf *testClientFactory) ClientFor(org, repo string) (git.RepoClient, error) return &fetchOnlyNoCleanRepoClient{cf.rcMap[repo]}, nil } +func (cf *testClientFactory) ClientForWithRepoOpts(org, repo string, repoOpts git.RepoOpts) (git.RepoClient, error) { + cf.clientsCreated++ + // Returning this RepoClient ensures that only Fetch() is called and that Close() is not. + + return &fetchOnlyNoCleanRepoClient{cf.rcMap[repo]}, nil +} + type fetchOnlyNoCleanRepoClient struct { git.RepoClient // This will be nil during testing, we override the functions that are allowed to be used. } diff --git a/prow/git/v2/adapter.go b/prow/git/v2/adapter.go index e2715db70157..540b1afcc3c3 100644 --- a/prow/git/v2/adapter.go +++ b/prow/git/v2/adapter.go @@ -59,6 +59,12 @@ func (a *clientFactoryAdapter) ClientFor(org, repo string) (RepoClient, error) { return &repoClientAdapter{Repo: r}, err } +// v1 clients do not support customizing pre-repo options. +func (a *clientFactoryAdapter) ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error) { + r, err := a.Client.Clone(org, repo) + return &repoClientAdapter{Repo: r}, err +} + type repoClientAdapter struct { *git.Repo } diff --git a/prow/git/v2/client_factory.go b/prow/git/v2/client_factory.go index d44198ed27bb..11df3d7e49bf 100644 --- a/prow/git/v2/client_factory.go +++ b/prow/git/v2/client_factory.go @@ -35,6 +35,10 @@ type ClientFactory interface { ClientFromDir(org, repo, dir string) (RepoClient, error) // ClientFor creates a client that operates on a new clone of the repo. ClientFor(org, repo string) (RepoClient, error) + // ClientForWithRepoOpts is like ClientFor, but allows you to customize the + // setup of the cloned repo (such as sparse checkouts instead of using the + // default full clone). + ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error) // Clean removes the caches used to generate clients Clean() error @@ -75,6 +79,21 @@ type ClientFactoryOpts struct { Persist *bool } +// These options are scoped to the repo, not the ClientFactory level. The reason +// for the separation is to allow a single process to have for example repos +// that are both sparsely checked out and non-sparsely checked out. +type RepoOpts struct { + // sparseCheckoutDirs is the list of directories that the working tree + // should have. If non-nil and empty, then the working tree only has files + // reachable from the root. If non-nil and non-empty, then those additional + // directories from the root are also checked out (populated) in the working + // tree, recursively. + SparseCheckoutDirs []string + // This is the `--share` flag to `git clone`. For cloning from a local + // source, it allows bypassing the copying of all objects. + ShareObjectsWithSourceRepo bool +} + // Apply allows to use a ClientFactoryOpts as Opt func (cfo *ClientFactoryOpts) Apply(target *ClientFactoryOpts) { if cfo.Host != "" { @@ -278,15 +297,25 @@ func (c *clientFactory) ClientFromDir(org, repo, dir string) (RepoClient, error) return client, err } -// ClientFor returns a repository client for the specified repository. +// ClientFor wraps around ClientForWithRepoOpts using the default RepoOpts{} +// (empty value). Originally, ClientFor was not a wrapper at all and did the +// work inside ClientForWithRepoOpts itself, but it did this without RepoOpts. +// When RepoOpts was created, we made ClientFor wrap around +// ClientForWithRepoOpts to preserve behavior of existing callers of ClientFor. +func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) { + return c.ClientForWithRepoOpts(org, repo, RepoOpts{}) +} + +// ClientForWithRepoOpts returns a repository client for the specified repository. // This function may take a long time if it is the first time cloning the repo. // In that case, it must do a full git mirror clone. For large repos, this can -// take a while. Once that is done, it will do a git fetch instead of a clone, -// which will usually take at most a few seconds. +// take a while. Once that is done, it will do a git remote update (essentially +// git fetch) for the mirror clone, which will usually take at most a few +// seconds, before creating a secondary clone from this (updated) mirror. // // org and repo are used for determining where the repo is cloned, cloneURI // overrides org/repo for cloning. -func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) { +func (c *clientFactory) ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error) { cacheDir := path.Join(c.cacheDir, org, repo) c.logger.WithFields(logrus.Fields{"org": org, "repo": repo, "dir": cacheDir}).Debug("Creating a client from the cache.") cacheClientCacher, _, _, err := c.bootstrapClients(org, repo, cacheDir) @@ -324,7 +353,7 @@ func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) { } // initialize the new derivative repo from the cache - if err := repoClientCloner.Clone(cacheDir); err != nil { + if err := repoClientCloner.CloneWithRepoOpts(cacheDir, repoOpts); err != nil { return nil, err } diff --git a/prow/git/v2/interactor.go b/prow/git/v2/interactor.go index c82af6585d08..e0cf650820ce 100644 --- a/prow/git/v2/interactor.go +++ b/prow/git/v2/interactor.go @@ -88,6 +88,7 @@ type cacher interface { type cloner interface { // Clone clones the repository from a local path. Clone(from string) error + CloneWithRepoOpts(from string, repoOpts RepoOpts) error } // MergeOpt holds options for git merge operations. @@ -139,10 +140,46 @@ func (i *interactor) IsDirty() (bool, error) { // Clone clones the repository from a local path. func (i *interactor) Clone(from string) error { + return i.CloneWithRepoOpts(from, RepoOpts{}) +} + +// CloneWithRepoOpts clones the repository from a local path, but additionally +// use any repository options (RepoOpts) to customize the clone behavior. +func (i *interactor) CloneWithRepoOpts(from string, repoOpts RepoOpts) error { i.logger.Infof("Creating a clone of the repo at %s from %s", i.dir, from) - if out, err := i.executor.Run("clone", from, i.dir); err != nil { + cloneArgs := []string{"clone"} + + if repoOpts.ShareObjectsWithSourceRepo { + cloneArgs = append(cloneArgs, "--shared") + } + + // Handle sparse checkouts. + if repoOpts.SparseCheckoutDirs != nil { + cloneArgs = append(cloneArgs, "--sparse") + } + + cloneArgs = append(cloneArgs, []string{from, i.dir}...) + + if out, err := i.executor.Run(cloneArgs...); err != nil { return fmt.Errorf("error creating a clone: %w %v", err, string(out)) } + + // For sparse checkouts, we have to do some additional housekeeping after + // the clone is completed. We use Git's global "-C " flag to + // switch to that directory before running the "sparse-checkout" command, + // because otherwise the command will fail (because it will try to run the + // command in the $PWD, which is not the same as the just-created clone + // directory (i.dir)). + if repoOpts.SparseCheckoutDirs != nil { + if len(repoOpts.SparseCheckoutDirs) == 0 { + return nil + } + sparseCheckoutArgs := []string{"-C", i.dir, "sparse-checkout", "set"} + sparseCheckoutArgs = append(sparseCheckoutArgs, repoOpts.SparseCheckoutDirs...) + if out, err := i.executor.Run(sparseCheckoutArgs...); err != nil { + return fmt.Errorf("error setting it to a sparse checkout: %w %v", err, string(out)) + } + } return nil } diff --git a/prow/git/v2/interactor_test.go b/prow/git/v2/interactor_test.go index 687453c494d0..f4b07b961189 100644 --- a/prow/git/v2/interactor_test.go +++ b/prow/git/v2/interactor_test.go @@ -39,29 +39,29 @@ func TestInteractor_Clone(t *testing.T) { }{ { name: "happy case", - dir: "/else", - from: "/somewhere", + dir: "/secondaryclone", + from: "/mirrorclone", responses: map[string]execResponse{ - "clone /somewhere /else": { + "clone /mirrorclone /secondaryclone": { out: []byte(`ok`), }, }, expectedCalls: [][]string{ - {"clone", "/somewhere", "/else"}, + {"clone", "/mirrorclone", "/secondaryclone"}, }, expectedErr: false, }, { name: "clone fails", - dir: "/else", - from: "/somewhere", + dir: "/secondaryclone", + from: "/mirrorclone", responses: map[string]execResponse{ - "clone /somewhere /else": { + "clone /mirrorclone /secondaryclone": { err: errors.New("oops"), }, }, expectedCalls: [][]string{ - {"clone", "/somewhere", "/else"}, + {"clone", "/mirrorclone", "/secondaryclone"}, }, expectedErr: true, }, @@ -93,6 +93,118 @@ func TestInteractor_Clone(t *testing.T) { } } +func TestInteractor_CloneWithRepoOpts(t *testing.T) { + var testCases = []struct { + name string + dir string + from string + remote RemoteResolver + repoOpts RepoOpts + responses map[string]execResponse + expectedCalls [][]string + expectedErr bool + }{ + { + name: "blank RepoOpts", + dir: "/secondaryclone", + from: "/mirrorclone", + repoOpts: RepoOpts{}, + responses: map[string]execResponse{ + "clone /mirrorclone /secondaryclone": { + out: []byte(`ok`), + }, + }, + expectedCalls: [][]string{ + {"clone", "/mirrorclone", "/secondaryclone"}, + }, + expectedErr: false, + }, + { + name: "shared git objects", + dir: "/secondaryclone", + from: "/mirrorclone", + repoOpts: RepoOpts{ + SparseCheckoutDirs: nil, + ShareObjectsWithSourceRepo: true, + }, + responses: map[string]execResponse{ + "clone --shared /mirrorclone /secondaryclone": { + out: []byte(`ok`), + }, + }, + expectedCalls: [][]string{ + {"clone", "--shared", "/mirrorclone", "/secondaryclone"}, + }, + expectedErr: false, + }, + { + name: "shared git objects and sparse checkout (toplevel only)", + dir: "/secondaryclone", + from: "/mirrorclone", + repoOpts: RepoOpts{ + SparseCheckoutDirs: []string{}, + ShareObjectsWithSourceRepo: true, + }, + responses: map[string]execResponse{ + "clone --shared --sparse /mirrorclone /secondaryclone": { + out: []byte(`ok`), + }, + }, + expectedCalls: [][]string{ + {"clone", "--shared", "--sparse", "/mirrorclone", "/secondaryclone"}, + }, + expectedErr: false, + }, + { + name: "shared git objects and sparse checkout (toplevel+subdirs)", + dir: "/secondaryclone", + from: "/mirrorclone", + repoOpts: RepoOpts{ + SparseCheckoutDirs: []string{"a", "b"}, + ShareObjectsWithSourceRepo: true, + }, + responses: map[string]execResponse{ + "clone --shared --sparse /mirrorclone /secondaryclone": { + out: []byte(`ok`), + }, + "-C /secondaryclone sparse-checkout set a b": { + out: []byte(`ok`), + }, + }, + expectedCalls: [][]string{ + {"clone", "--shared", "--sparse", "/mirrorclone", "/secondaryclone"}, + {"-C", "/secondaryclone", "sparse-checkout", "set", "a", "b"}, + }, + expectedErr: false, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + e := fakeExecutor{ + records: [][]string{}, + responses: testCase.responses, + } + i := interactor{ + executor: &e, + remote: testCase.remote, + dir: testCase.dir, + logger: logrus.WithField("test", testCase.name), + } + actualErr := i.CloneWithRepoOpts(testCase.from, testCase.repoOpts) + if testCase.expectedErr && actualErr == nil { + t.Errorf("%s: expected an error but got none", testCase.name) + } + if !testCase.expectedErr && actualErr != nil { + t.Errorf("%s: expected no error but got one: %v", testCase.name, actualErr) + } + if actual, expected := e.records, testCase.expectedCalls; !reflect.DeepEqual(actual, expected) { + t.Errorf("%s: got incorrect git calls: %v", testCase.name, diff.ObjectReflectDiff(actual, expected)) + } + }) + } +} + func TestInteractor_MirrorClone(t *testing.T) { var testCases = []struct { name string