Skip to content

Commit

Permalink
Use identifier argument for .update(), add lots of comments for f…
Browse files Browse the repository at this point in the history
…uture readers
  • Loading branch information
benjaoming committed Jun 13, 2023
1 parent f8f35ac commit 9d616f0
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 31 deletions.
7 changes: 3 additions & 4 deletions readthedocs/doc_builder/director.py
Expand Up @@ -94,7 +94,6 @@ def setup_vcs(self):
environment=self.vcs_environment,
verbose_name=self.data.version.verbose_name,
version_type=self.data.version.type,
version_identifier=self.data.version.identifier,
)

# We can't do too much on ``pre_checkout`` because we haven't
Expand Down Expand Up @@ -214,10 +213,10 @@ def build(self):
def checkout(self):
"""Checkout Git repo and load build config file."""

log.info("Cloning repository.")
self.vcs_repository.update()

identifier = self.data.build_commit or self.data.version.identifier
log.info("Cloning and fetching.", identifier=identifier)
self.vcs_repository.update(identifier=identifier)

log.info("Checking out.", identifier=identifier)
self.vcs_repository.checkout(identifier)

Expand Down
6 changes: 5 additions & 1 deletion readthedocs/projects/models.py
Expand Up @@ -993,7 +993,11 @@ def has_htmlzip(self, version_slug=LATEST, version_type=None):
)

def vcs_repo(
self, environment, version=LATEST, verbose_name=None, version_type=None
self,
environment,
version=LATEST,
verbose_name=None,
version_type=None,
):
"""
Return a Backend object for this project able to handle VCS commands.
Expand Down
4 changes: 2 additions & 2 deletions readthedocs/vcs_support/backends/bzr.py
Expand Up @@ -16,8 +16,8 @@ class Backend(BaseVCS):
supports_tags = True
fallback_branch = ''

def update(self):
super().update()
def update(self, identifier=None):
super().update(identifier=identifier)
if self.repo_exists():
return self.up()
return self.clone()
Expand Down
56 changes: 37 additions & 19 deletions readthedocs/vcs_support/backends/git.py
Expand Up @@ -35,7 +35,6 @@ class Backend(BaseVCS):
repo_depth = 50

def __init__(self, *args, **kwargs):
self.version_identifier = kwargs.pop("version_identifier", None)
super().__init__(*args, **kwargs)
self.token = kwargs.get('token')
self.repo_url = self._get_clone_url()
Expand All @@ -56,9 +55,15 @@ def _get_clone_url(self):
def set_remote_url(self, url):
return self.run('git', 'remote', 'set-url', 'origin', url)

def update(self):
"""Clone or update the repository."""
super().update()
def update(self, identifier=None):
"""
Clone or update the repository.
:param identifier: This is the optional identifier for git fetch - a branch or tag name.
PR references are generated automatically for certain Git providers.
:return:
"""
super().update(identifier=identifier)

if self.use_clone_fetch_checkout_pattern():

Expand All @@ -67,7 +72,7 @@ def update(self):
self.clone_ng()
# New behavior: No confusing return value. We are not using return values
# in the callers.
self.fetch_ng()
self.fetch_ng(identifier=identifier)

else:
if self.repo_exists():
Expand All @@ -80,52 +85,65 @@ def update(self):
return self.fetch()
return self.clone()

def get_remote_reference(self):
def get_remote_fetch_reference(self, identifier):
"""
Gets a valid remote reference for the identifier.
:param identifier: Should be a branch or tag name when building branches or tags.
:return: A reference valid for fetch operation
"""
# Tags and branches have the tag/branch identifier set by the caller who instantiated the
# Git backend -- this means that the build process needs to know this from build data,
# essentially from an incoming webhook call.
if self.version_type in (BRANCH, TAG):
return self.version_identifier
return identifier
if self.version_type == EXTERNAL:

# TODO: We should be able to resolve this without looking up in oauth registry
git_provider_name = self.project.git_provider_name

# TODO: Why are these our only patterns?
if git_provider_name == GITHUB_BRAND:
return GITHUB_PR_PULL_PATTERN.format(id=self.verbose_name)
if self.project.git_provider_name == GITLAB_BRAND:
return GITLAB_MR_PULL_PATTERN
return GITLAB_MR_PULL_PATTERN.format(id=self.verbose_name)

# This seems to be the default behavior when we don't know the remote
# reference for a PR/MR: Just fetch everything
# TODO: Provide more information about fetch operations without references
return None

def clone_ng(self):
# If the repository is already cloned, we don't do anything.
# TODO: Why is it already cloned?
# This is likely legacy, but we may want to be able to call .update()
# several times in the same build
if self.repo_exists():
return

# --no-checkout: Makes it explicit what we are doing here. Nothing is checked out
# until it's explcitly done.
# until it's explicitly done.
# --depth 1: Shallow clone, fetch as little data as possible.
cmd = ["git", "clone", "--no-checkout", "--depth", "1"]
cmd = ["git", "clone", "--no-checkout", "--depth", "1", self.repo_url, "."]

code, stdout, stderr = self.run(*cmd)
return code, stdout, stderr

def fetch_ng(self):
def fetch_ng(self, identifier):
"""Implementation for new clone+fetch+checkout pattern."""

# --force: ?
# --force: Likely legacy, it seems to be irrelevant to this usage
# --tags: We need to fetch tags in order to resolve these references in the checkout
# --prune: ?
# --prune-tags: ?
# --prune: Likely legacy, we don't expect a previous fetch command to have run
# --prune-tags: Likely legacy, we don't expect a previous fetch command to have run
# --tags: This flag was used in the previous approach such that all tags were fetched
# in order to checkout a tag afterwards.
cmd = ["git", "fetch", "origin", "--force", "--tags", "--prune", "--prune-tags"]
remote_reference = self.get_remote_reference()
remote_reference = self.get_remote_fetch_reference(identifier)

if remote_reference:
# TODO: If we are fetching just one reference, what should the depth be?
# TODO: We are still fetching the latest 50 commits.
# A PR might have another commit added after the build has started...
cmd.append("--depth 1")
cmd.append(remote_reference)
cmd.extend(["--depth", self.repo_depth, remote_reference])

code, stdout, stderr = self.run(*cmd)
return code, stdout, stderr
Expand Down
4 changes: 2 additions & 2 deletions readthedocs/vcs_support/backends/hg.py
Expand Up @@ -12,8 +12,8 @@ class Backend(BaseVCS):
supports_branches = True
fallback_branch = 'default'

def update(self):
super().update()
def update(self, identifier=None):
super().update(identifier=identifier)
if self.repo_exists():
return self.pull()
return self.clone()
Expand Down
4 changes: 2 additions & 2 deletions readthedocs/vcs_support/backends/svn.py
Expand Up @@ -26,8 +26,8 @@ def __init__(self, *args, **kwargs):
else:
self.base_url = self.repo_url

def update(self):
super().update()
def update(self, identifier=None):
super().update(identifier=identifier)
if self.repo_exists():
return self.up()
return self.co()
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/vcs_support/base.py
Expand Up @@ -83,7 +83,7 @@ def make_clean_working_dir(self):
safe_rmtree(self.working_dir, ignore_errors=True)
self.check_working_dir()

def update(self):
def update(self, identifier=None):
"""
Update a local copy of the repository in self.working_dir.
Expand Down

0 comments on commit 9d616f0

Please sign in to comment.