Skip to content

Commit

Permalink
Merge pull request kubernetes#30400 from listx/inrepoconfig-optimizat…
Browse files Browse the repository at this point in the history
…ions

inrepoconfig: optimize secondary clones
  • Loading branch information
k8s-ci-robot committed Aug 18, 2023
2 parents 1e81b5a + 8e5361f commit 916ed2b
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 16 deletions.
23 changes: 21 additions & 2 deletions prow/config/inrepoconfig.go
Expand Up @@ -43,6 +43,25 @@ const (
inRepoConfigDirName = ".prow"
)

var inrepoconfigRepoOpts = git.RepoOpts{
// Technically we only need inRepoConfigDirName (".prow") because the
// default "cone mode" of sparse checkouts already include files at the
// toplevel (which would include ".prow.yaml").
//
// TODO (listx): The version of git shipped in kubekins-e2e (itself
// derived from the bootstrap image) uses git version 2.30.2, which does
// not populate files at the toplevel. So we have to also set a sparse
// checkout of ".prow.yaml". Later when that image is updated, we can
// remove the use of inRepoConfigFileName (".prow.yaml"), so that the
// unit tests in CI can pass. As for the Prow components themselves,
// they use a different version of Git based on alpine (see .ko.yaml in
// the root).
SparseCheckoutDirs: []string{inRepoConfigDirName, inRepoConfigFileName},
// The sparse checkout would avoid creating another copy of Git objects
// from the mirror clone into the secondary clone.
ShareObjectsWithSourceRepo: true,
}

var inrepoconfigMetrics = struct {
gitCloneDuration *prometheus.HistogramVec
gitOtherDuration *prometheus.HistogramVec
Expand Down Expand Up @@ -119,7 +138,7 @@ func prowYAMLGetter(
}

timeBeforeClone := time.Now()
repo, err := gc.ClientFor(orgRepo.Org, orgRepo.Repo)
repo, err := gc.ClientForWithRepoOpts(orgRepo.Org, orgRepo.Repo, inrepoconfigRepoOpts)
inrepoconfigMetrics.gitCloneDuration.WithLabelValues(orgRepo.Org, orgRepo.Repo).Observe((float64(time.Since(timeBeforeClone).Seconds())))
if err != nil {
return nil, fmt.Errorf("failed to clone repo for %q: %w", identifier, err)
Expand Down Expand Up @@ -379,7 +398,7 @@ func (c *InRepoConfigGitCache) ClientFor(org, repo string) (git.RepoClient, erro
if cached != nil || err != nil {
return cached, err
}
coreClient, err := c.ClientFactory.ClientFor(org, repo)
coreClient, err := c.ClientFactory.ClientForWithRepoOpts(org, repo, inrepoconfigRepoOpts)
if err != nil {
return nil, err
}
Expand Down
7 changes: 7 additions & 0 deletions prow/config/inrepoconfig_test.go
Expand Up @@ -681,6 +681,13 @@ func (cf *testClientFactory) ClientFor(org, repo string) (git.RepoClient, error)
return &fetchOnlyNoCleanRepoClient{cf.rcMap[repo]}, nil
}

func (cf *testClientFactory) ClientForWithRepoOpts(org, repo string, repoOpts git.RepoOpts) (git.RepoClient, error) {
cf.clientsCreated++
// Returning this RepoClient ensures that only Fetch() is called and that Close() is not.

return &fetchOnlyNoCleanRepoClient{cf.rcMap[repo]}, nil
}

type fetchOnlyNoCleanRepoClient struct {
git.RepoClient // This will be nil during testing, we override the functions that are allowed to be used.
}
Expand Down
6 changes: 6 additions & 0 deletions prow/git/v2/adapter.go
Expand Up @@ -59,6 +59,12 @@ func (a *clientFactoryAdapter) ClientFor(org, repo string) (RepoClient, error) {
return &repoClientAdapter{Repo: r}, err
}

// v1 clients do not support customizing pre-repo options.
func (a *clientFactoryAdapter) ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error) {
r, err := a.Client.Clone(org, repo)
return &repoClientAdapter{Repo: r}, err
}

type repoClientAdapter struct {
*git.Repo
}
Expand Down
39 changes: 34 additions & 5 deletions prow/git/v2/client_factory.go
Expand Up @@ -35,6 +35,10 @@ type ClientFactory interface {
ClientFromDir(org, repo, dir string) (RepoClient, error)
// ClientFor creates a client that operates on a new clone of the repo.
ClientFor(org, repo string) (RepoClient, error)
// ClientForWithRepoOpts is like ClientFor, but allows you to customize the
// setup of the cloned repo (such as sparse checkouts instead of using the
// default full clone).
ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error)

// Clean removes the caches used to generate clients
Clean() error
Expand Down Expand Up @@ -75,6 +79,21 @@ type ClientFactoryOpts struct {
Persist *bool
}

// These options are scoped to the repo, not the ClientFactory level. The reason
// for the separation is to allow a single process to have for example repos
// that are both sparsely checked out and non-sparsely checked out.
type RepoOpts struct {
// sparseCheckoutDirs is the list of directories that the working tree
// should have. If non-nil and empty, then the working tree only has files
// reachable from the root. If non-nil and non-empty, then those additional
// directories from the root are also checked out (populated) in the working
// tree, recursively.
SparseCheckoutDirs []string
// This is the `--share` flag to `git clone`. For cloning from a local
// source, it allows bypassing the copying of all objects.
ShareObjectsWithSourceRepo bool
}

// Apply allows to use a ClientFactoryOpts as Opt
func (cfo *ClientFactoryOpts) Apply(target *ClientFactoryOpts) {
if cfo.Host != "" {
Expand Down Expand Up @@ -278,15 +297,25 @@ func (c *clientFactory) ClientFromDir(org, repo, dir string) (RepoClient, error)
return client, err
}

// ClientFor returns a repository client for the specified repository.
// ClientFor wraps around ClientForWithRepoOpts using the default RepoOpts{}
// (empty value). Originally, ClientFor was not a wrapper at all and did the
// work inside ClientForWithRepoOpts itself, but it did this without RepoOpts.
// When RepoOpts was created, we made ClientFor wrap around
// ClientForWithRepoOpts to preserve behavior of existing callers of ClientFor.
func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) {
return c.ClientForWithRepoOpts(org, repo, RepoOpts{})
}

// ClientForWithRepoOpts returns a repository client for the specified repository.
// This function may take a long time if it is the first time cloning the repo.
// In that case, it must do a full git mirror clone. For large repos, this can
// take a while. Once that is done, it will do a git fetch instead of a clone,
// which will usually take at most a few seconds.
// take a while. Once that is done, it will do a git remote update (essentially
// git fetch) for the mirror clone, which will usually take at most a few
// seconds, before creating a secondary clone from this (updated) mirror.
//
// org and repo are used for determining where the repo is cloned, cloneURI
// overrides org/repo for cloning.
func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) {
func (c *clientFactory) ClientForWithRepoOpts(org, repo string, repoOpts RepoOpts) (RepoClient, error) {
cacheDir := path.Join(c.cacheDir, org, repo)
c.logger.WithFields(logrus.Fields{"org": org, "repo": repo, "dir": cacheDir}).Debug("Creating a client from the cache.")
cacheClientCacher, _, _, err := c.bootstrapClients(org, repo, cacheDir)
Expand Down Expand Up @@ -324,7 +353,7 @@ func (c *clientFactory) ClientFor(org, repo string) (RepoClient, error) {
}

// initialize the new derivative repo from the cache
if err := repoClientCloner.Clone(cacheDir); err != nil {
if err := repoClientCloner.CloneWithRepoOpts(cacheDir, repoOpts); err != nil {
return nil, err
}

Expand Down
39 changes: 38 additions & 1 deletion prow/git/v2/interactor.go
Expand Up @@ -88,6 +88,7 @@ type cacher interface {
type cloner interface {
// Clone clones the repository from a local path.
Clone(from string) error
CloneWithRepoOpts(from string, repoOpts RepoOpts) error
}

// MergeOpt holds options for git merge operations.
Expand Down Expand Up @@ -139,10 +140,46 @@ func (i *interactor) IsDirty() (bool, error) {

// Clone clones the repository from a local path.
func (i *interactor) Clone(from string) error {
return i.CloneWithRepoOpts(from, RepoOpts{})
}

// CloneWithRepoOpts clones the repository from a local path, but additionally
// use any repository options (RepoOpts) to customize the clone behavior.
func (i *interactor) CloneWithRepoOpts(from string, repoOpts RepoOpts) error {
i.logger.Infof("Creating a clone of the repo at %s from %s", i.dir, from)
if out, err := i.executor.Run("clone", from, i.dir); err != nil {
cloneArgs := []string{"clone"}

if repoOpts.ShareObjectsWithSourceRepo {
cloneArgs = append(cloneArgs, "--shared")
}

// Handle sparse checkouts.
if repoOpts.SparseCheckoutDirs != nil {
cloneArgs = append(cloneArgs, "--sparse")
}

cloneArgs = append(cloneArgs, []string{from, i.dir}...)

if out, err := i.executor.Run(cloneArgs...); err != nil {
return fmt.Errorf("error creating a clone: %w %v", err, string(out))
}

// For sparse checkouts, we have to do some additional housekeeping after
// the clone is completed. We use Git's global "-C <directory>" flag to
// switch to that directory before running the "sparse-checkout" command,
// because otherwise the command will fail (because it will try to run the
// command in the $PWD, which is not the same as the just-created clone
// directory (i.dir)).
if repoOpts.SparseCheckoutDirs != nil {
if len(repoOpts.SparseCheckoutDirs) == 0 {
return nil
}
sparseCheckoutArgs := []string{"-C", i.dir, "sparse-checkout", "set"}
sparseCheckoutArgs = append(sparseCheckoutArgs, repoOpts.SparseCheckoutDirs...)
if out, err := i.executor.Run(sparseCheckoutArgs...); err != nil {
return fmt.Errorf("error setting it to a sparse checkout: %w %v", err, string(out))
}
}
return nil
}

Expand Down
128 changes: 120 additions & 8 deletions prow/git/v2/interactor_test.go
Expand Up @@ -39,29 +39,29 @@ func TestInteractor_Clone(t *testing.T) {
}{
{
name: "happy case",
dir: "/else",
from: "/somewhere",
dir: "/secondaryclone",
from: "/mirrorclone",
responses: map[string]execResponse{
"clone /somewhere /else": {
"clone /mirrorclone /secondaryclone": {
out: []byte(`ok`),
},
},
expectedCalls: [][]string{
{"clone", "/somewhere", "/else"},
{"clone", "/mirrorclone", "/secondaryclone"},
},
expectedErr: false,
},
{
name: "clone fails",
dir: "/else",
from: "/somewhere",
dir: "/secondaryclone",
from: "/mirrorclone",
responses: map[string]execResponse{
"clone /somewhere /else": {
"clone /mirrorclone /secondaryclone": {
err: errors.New("oops"),
},
},
expectedCalls: [][]string{
{"clone", "/somewhere", "/else"},
{"clone", "/mirrorclone", "/secondaryclone"},
},
expectedErr: true,
},
Expand Down Expand Up @@ -93,6 +93,118 @@ func TestInteractor_Clone(t *testing.T) {
}
}

func TestInteractor_CloneWithRepoOpts(t *testing.T) {
var testCases = []struct {
name string
dir string
from string
remote RemoteResolver
repoOpts RepoOpts
responses map[string]execResponse
expectedCalls [][]string
expectedErr bool
}{
{
name: "blank RepoOpts",
dir: "/secondaryclone",
from: "/mirrorclone",
repoOpts: RepoOpts{},
responses: map[string]execResponse{
"clone /mirrorclone /secondaryclone": {
out: []byte(`ok`),
},
},
expectedCalls: [][]string{
{"clone", "/mirrorclone", "/secondaryclone"},
},
expectedErr: false,
},
{
name: "shared git objects",
dir: "/secondaryclone",
from: "/mirrorclone",
repoOpts: RepoOpts{
SparseCheckoutDirs: nil,
ShareObjectsWithSourceRepo: true,
},
responses: map[string]execResponse{
"clone --shared /mirrorclone /secondaryclone": {
out: []byte(`ok`),
},
},
expectedCalls: [][]string{
{"clone", "--shared", "/mirrorclone", "/secondaryclone"},
},
expectedErr: false,
},
{
name: "shared git objects and sparse checkout (toplevel only)",
dir: "/secondaryclone",
from: "/mirrorclone",
repoOpts: RepoOpts{
SparseCheckoutDirs: []string{},
ShareObjectsWithSourceRepo: true,
},
responses: map[string]execResponse{
"clone --shared --sparse /mirrorclone /secondaryclone": {
out: []byte(`ok`),
},
},
expectedCalls: [][]string{
{"clone", "--shared", "--sparse", "/mirrorclone", "/secondaryclone"},
},
expectedErr: false,
},
{
name: "shared git objects and sparse checkout (toplevel+subdirs)",
dir: "/secondaryclone",
from: "/mirrorclone",
repoOpts: RepoOpts{
SparseCheckoutDirs: []string{"a", "b"},
ShareObjectsWithSourceRepo: true,
},
responses: map[string]execResponse{
"clone --shared --sparse /mirrorclone /secondaryclone": {
out: []byte(`ok`),
},
"-C /secondaryclone sparse-checkout set a b": {
out: []byte(`ok`),
},
},
expectedCalls: [][]string{
{"clone", "--shared", "--sparse", "/mirrorclone", "/secondaryclone"},
{"-C", "/secondaryclone", "sparse-checkout", "set", "a", "b"},
},
expectedErr: false,
},
}

for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
e := fakeExecutor{
records: [][]string{},
responses: testCase.responses,
}
i := interactor{
executor: &e,
remote: testCase.remote,
dir: testCase.dir,
logger: logrus.WithField("test", testCase.name),
}
actualErr := i.CloneWithRepoOpts(testCase.from, testCase.repoOpts)
if testCase.expectedErr && actualErr == nil {
t.Errorf("%s: expected an error but got none", testCase.name)
}
if !testCase.expectedErr && actualErr != nil {
t.Errorf("%s: expected no error but got one: %v", testCase.name, actualErr)
}
if actual, expected := e.records, testCase.expectedCalls; !reflect.DeepEqual(actual, expected) {
t.Errorf("%s: got incorrect git calls: %v", testCase.name, diff.ObjectReflectDiff(actual, expected))
}
})
}
}

func TestInteractor_MirrorClone(t *testing.T) {
var testCases = []struct {
name string
Expand Down

0 comments on commit 916ed2b

Please sign in to comment.