Skip to content

Commit

Permalink
git v2 client for inrepoconfig: allow targeted fetches (2nd attempt)
Browse files Browse the repository at this point in the history
This is a reintroduction of bc68007 (git v2 client for inrepoconfig:
allow targeted fetches, 2023-08-21), which was reverted in
fb82b69 (Revert "Merge pull request #30453 from listx/minimal-fetch",
2023-08-22).

This version includes a fix for the timing of the fetch. In bc68007
we called ensureCommits() with "repoClient", but _before_ it was
cloned (created)! This version fixes that (now obvious) bug. Below is
the original commit message of bc68007, because the semantics there
still apply as is.

Allow fetching a small number of commit SHAs directly if RepoOpts
specifies FetchCommits.  This avoids the costly RemoteUpdate call which
fetches every remote tracking branch (as well as all new ones that show
up on the remote).

Make tag fetching optional because they could be unnecessary (for
inrepoconfig, we already check out a working tree based on the baseSHA
and headSHAs in prowYAMLGetter).

Also, only perform a fetch once. Previously in ensureCommits we did at
least 2 separate fetch operations for every presubmit job, because a
presubmit has a baseSHA and a headSHA (PR HEAD). Now we combine them
into a single fetch command, so that we talk to the server once instead
of twice.
  • Loading branch information
Linus Arver committed Aug 23, 2023
1 parent 6406e6d commit bdd601c
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 27 deletions.
29 changes: 6 additions & 23 deletions prow/config/inrepoconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ var inrepoconfigRepoOpts = git.RepoOpts{
// The sparse checkout would avoid creating another copy of Git objects
// from the mirror clone into the secondary clone.
ShareObjectsWithSourceRepo: true,
// Disable fetching tag objects, because they are redundant and useless (we
// already have baseSHA and headSHAs).
NoFetchTags: true,
}

var inrepoconfigMetrics = struct {
Expand Down Expand Up @@ -137,7 +140,9 @@ func prowYAMLGetter(
}

timeBeforeClone := time.Now()
repo, err := gc.ClientForWithRepoOpts(orgRepo.Org, orgRepo.Repo, inrepoconfigRepoOpts)
repoOpts := inrepoconfigRepoOpts
repoOpts.FetchCommits = append(headSHAs, baseSHA)
repo, err := gc.ClientForWithRepoOpts(orgRepo.Org, orgRepo.Repo, repoOpts)
inrepoconfigMetrics.gitCloneDuration.WithLabelValues(orgRepo.Org, orgRepo.Repo).Observe((float64(time.Since(timeBeforeClone).Seconds())))
if err != nil {
return nil, fmt.Errorf("failed to clone repo for %q: %w", identifier, err)
Expand Down Expand Up @@ -170,35 +175,13 @@ func prowYAMLGetter(
}

log.WithField("merge-strategy", mergeMethod).Debug("Using merge strategy.")
if err := ensureCommits(repo, baseSHA, headSHAs...); err != nil {
return nil, fmt.Errorf("failed to fetch headSHAs: %v", err)
}
if err := repo.MergeAndCheckout(baseSHA, string(mergeMethod), headSHAs...); err != nil {
return nil, fmt.Errorf("failed to merge: %w", err)
}

return ReadProwYAML(log, repo.Directory(), false)
}

func ensureCommits(repo git.RepoClient, baseSHA string, headSHAs ...string) error {
//Ensure baseSHA exists.
if exists, _ := repo.ObjectExists(baseSHA); !exists {
if err := repo.Fetch(baseSHA); err != nil {
return fmt.Errorf("failed to fetch baseSHA: %s: %v", baseSHA, err)
}
}
//Ensure headSHAs exist
for _, sha := range headSHAs {
// This is best effort. No need to check for error
if exists, _ := repo.ObjectExists(sha); !exists {
if err := repo.Fetch(sha); err != nil {
return fmt.Errorf("failed to fetch headSHA: %s: %v", sha, err)
}
}
}
return nil
}

// ReadProwYAML parses the .prow.yaml file or .prow directory, no commit checkout or defaulting is included.
func ReadProwYAML(log *logrus.Entry, dir string, strict bool) (*ProwYAML, error) {
prowYAML := &ProwYAML{}
Expand Down
63 changes: 59 additions & 4 deletions prow/git/v2/client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ type RepoOpts struct {
// This is the `--share` flag to `git clone`. For cloning from a local
// source, it allows bypassing the copying of all objects.
ShareObjectsWithSourceRepo bool
// FetchCommits list only those commit SHAs which are needed. If the commit
// already exists, it is not fetched to save network costs. If FetchCommits
// is set, we do not call RemoteUpdate() for the primary clone (git cache).
FetchCommits []string
// NoFetchTags determines whether we disable fetching tag objects). Defaults
// to false (tag objects are fetched).
NoFetchTags bool
}

// Apply allows to use a ClientFactoryOpts as Opt
Expand Down Expand Up @@ -339,21 +346,69 @@ func (c *clientFactory) ClientForWithRepoOpts(org, repo string, repoOpts RepoOpt
// something unexpected happened
return nil, err
} else {
// we have cloned the repo previously, but will refresh it
if err := cacheClientCacher.RemoteUpdate(); err != nil {
return nil, err
// We have cloned the repo previously, but will refresh it. By default
// we refresh all refs with a call to `git remote update`.
//
// This is the default behavior if FetchCommits is empty or nil (i.e.,
// when we don't define a targeted list of commits to fetch directly).
if len(repoOpts.FetchCommits) == 0 {
if err := cacheClientCacher.RemoteUpdate(); err != nil {
return nil, err
}
}
}

// initialize the new derivative repo from the cache
// Clone the secondary clone (local clone with checked out files) from the
// primary (bare repo).
if err := repoClientCloner.CloneWithRepoOpts(cacheDir, repoOpts); err != nil {
return nil, err
}

// Here we do a targeted fetch, if the repoOpts for the secondary clone
// asked for it.
//
// TODO(listx): Change ensureCommits() so that it takes a "cacher" interface
// instead of a generic RepoClient. However, if ShareObjectsWithSourceRepo
// was not set, then we cannot run ensureCommits() with the cacher because
// the cacher will run on the primary, not secondary (and without --shared,
// this will have no effect on the secondary); we have to handle that case
// gracefully (perhaps with some logging also).
if len(repoOpts.FetchCommits) > 0 {
// Targeted fetch. Only fetch those commits which we want, and only
// if they are missing.
if err := ensureCommits(repoClient, repoOpts); err != nil {
return nil, err
}
}

return repoClient, nil
}

// Clean removes the caches used to generate clients
func (c *clientFactory) Clean() error {
return os.RemoveAll(c.cacheDir)
}

func ensureCommits(repoClient RepoClient, repoOpts RepoOpts) error {
fetchArgs := []string{}

if repoOpts.NoFetchTags {
fetchArgs = append(fetchArgs, "--no-tags")
}

// For each commit SHA, check if it already exists. If so, don't bother
// fetching it.
for _, commitSHA := range repoOpts.FetchCommits {
if exists, _ := repoClient.ObjectExists(commitSHA); exists {
continue
}

fetchArgs = append(fetchArgs, commitSHA)
}

if err := repoClient.Fetch(fetchArgs...); err != nil {
return fmt.Errorf("failed to fetch %s: %v", fetchArgs, err)
}

return nil
}

0 comments on commit bdd601c

Please sign in to comment.