Skip to content

Commit

Permalink
Merge 72bd76b into ec44150
Browse files Browse the repository at this point in the history
  • Loading branch information
brunoapimentel committed Dec 29, 2021
2 parents ec44150 + 72bd76b commit 0a71376
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 28 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,8 @@ with the `mod_auth_gssapi` module.

* `include-git-dir` - when used, `.git` file objects are not removed from the source bundle created
by Cachito. This is useful when the git history is important to the build process.
If this flag is absent, Cachito will use a shallow clone (depth=1) of the repository in order to save
resources.

* `cgo-disable` - use this flag to make Cachito set `CGO_ENABLED=0` while processing gomod packages.
This environment variable will only be used internally by Cachito, it will *not* be set in the
Expand Down
6 changes: 5 additions & 1 deletion cachito/web/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,11 @@ def create_request():
error_callback = tasks.failed_request_callback.s(request.id)
chain_tasks = [
tasks.fetch_app_source.s(
request.repo, request.ref, request.id, "git-submodule" in pkg_manager_names
request.repo,
request.ref,
request.id,
"git-submodule" in pkg_manager_names,
"include-git-dir" in request.flags,
).on_error(error_callback)
]

Expand Down
41 changes: 28 additions & 13 deletions cachito/workers/scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _create_archive(self, from_dir):
os.unlink(self.sources_dir.archive_path)
raise

def clone_and_archive(self, gitsubmodule=False):
def clone_and_archive(self, gitsubmodule=False, include_git_dir=False):
"""
Clone the git repository and create the compressed source archive.
Expand All @@ -153,14 +153,24 @@ def clone_and_archive(self, gitsubmodule=False):
with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir:
log.debug("Cloning the Git repository from %s", self.url)
clone_path = os.path.join(temp_dir, "repo")

kwargs = {
"no_checkout": True,
# Don't allow git to prompt for a username if we don't have access
"env": {"GIT_TERMINAL_PROMPT": "0"},
}

if include_git_dir:
# In case we don't need to keep the Git history, clone with depth 1 to save
# resources on large repos
kwargs["depth"] = 1

try:
repo = git.repo.Repo.clone_from(
self.url,
clone_path,
no_checkout=True,
# Don't allow git to prompt for a username if we don't have access
env={"GIT_TERMINAL_PROMPT": "0"},
)
repo = git.repo.Repo.clone_from(self.url, clone_path, **kwargs)

if include_git_dir:
# Fetch the exact commit we need
repo.remote().fetch(refspec=self.ref, depth=1)
except: # noqa E722
log.exception("Cloning the Git repository from %s failed", self.url)
raise CachitoError("Cloning the Git repository failed")
Expand All @@ -173,7 +183,7 @@ def clone_and_archive(self, gitsubmodule=False):
repo.git.gc("--prune=now")
self._create_archive(repo.working_dir)

def update_and_archive(self, previous_archive, gitsubmodule=False):
def update_and_archive(self, previous_archive, gitsubmodule=False, include_git_dir=False):
"""
Update the existing Git history and create a source archive.
Expand All @@ -187,10 +197,13 @@ def update_and_archive(self, previous_archive, gitsubmodule=False):
tar.extractall(temp_dir)

repo = git.Repo(os.path.join(temp_dir, "app"))

kwargs = {"depth": 1} if include_git_dir else {}

try:
# The reference must be specified to handle commits which are not part
# of a branch.
repo.remote().fetch(refspec=self.ref)
repo.remote().fetch(refspec=self.ref, **kwargs)
except: # noqa E722
log.exception("Failed to fetch from remote %s", self.url)
raise CachitoError("Failed to fetch from the remote Git repository")
Expand All @@ -202,7 +215,7 @@ def update_and_archive(self, previous_archive, gitsubmodule=False):
repo.git.gc("--prune=now")
self._create_archive(repo.working_dir)

def fetch_source(self, gitsubmodule=False):
def fetch_source(self, gitsubmodule=False, include_git_dir=False):
"""Fetch the repo, create a compressed tar file, and put it in long-term storage.
:param bool gitsubmodule: a bool to determine whether git submodules need to be processed.
Expand Down Expand Up @@ -235,7 +248,9 @@ def fetch_source(self, gitsubmodule=False):
if "-with-submodules" in str(previous_archive):
continue
try:
self.update_and_archive(previous_archive, gitsubmodule=gitsubmodule)
self.update_and_archive(
previous_archive, gitsubmodule=gitsubmodule, include_git_dir=include_git_dir,
)
return
except (
git.exc.InvalidGitRepositoryError,
Expand All @@ -253,7 +268,7 @@ def fetch_source(self, gitsubmodule=False):
previous_archive,
)

self.clone_and_archive(gitsubmodule=gitsubmodule)
self.clone_and_archive(gitsubmodule=gitsubmodule, include_git_dir=include_git_dir)

def update_git_submodules(self, repo):
"""Update git submodules.
Expand Down
4 changes: 2 additions & 2 deletions cachito/workers/tasks/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

@app.task(priority=0)
@runs_if_request_in_progress
def fetch_app_source(url, ref, request_id, gitsubmodule=False):
def fetch_app_source(url, ref, request_id, gitsubmodule=False, include_git_dir=False):
"""
Fetch the application source code that was requested and put it in long-term storage.
Expand All @@ -51,7 +51,7 @@ def fetch_app_source(url, ref, request_id, gitsubmodule=False):
try:
# Default to Git for now
scm = Git(url, ref)
scm.fetch_source(gitsubmodule=gitsubmodule)
scm.fetch_source(gitsubmodule=gitsubmodule, include_git_dir=include_git_dir)
except requests.Timeout:
raise CachitoError("The connection timed out while downloading the source")
except CachitoError:
Expand Down
6 changes: 6 additions & 0 deletions tests/test_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def test_create_and_fetch_request(
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
"git-submodule" in expected_pkg_managers,
"include-git-dir" in flags if flags is not None else False,
).on_error(error_callback)
]
if "gomod" in expected_pkg_managers:
Expand Down Expand Up @@ -220,6 +221,7 @@ def test_create_request_with_gomod_package_configs(
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
False,
False,
).on_error(error_callback),
fetch_gomod_source.si(1, [], package_value["gomod"]).on_error(error_callback),
process_fetched_sources.si(1).on_error(error_callback),
Expand Down Expand Up @@ -250,6 +252,7 @@ def test_create_request_with_npm_package_configs(
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
False,
False,
).on_error(error_callback),
fetch_npm_source.si(1, package_value["npm"]).on_error(error_callback),
process_fetched_sources.si(1).on_error(error_callback),
Expand Down Expand Up @@ -290,6 +293,7 @@ def test_create_request_with_pip_package_configs(mock_chain, app, auth_env, clie
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
False,
False,
).on_error(error_callback),
fetch_pip_source.si(1, package_value["pip"]).on_error(error_callback),
process_fetched_sources.si(1).on_error(error_callback),
Expand Down Expand Up @@ -320,6 +324,7 @@ def test_create_request_with_yarn_package_configs(
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
False,
False,
).on_error(error_callback),
fetch_yarn_source.si(1, package_value["yarn"]).on_error(error_callback),
process_fetched_sources.si(1).on_error(error_callback),
Expand Down Expand Up @@ -372,6 +377,7 @@ def test_create_and_fetch_request_with_flag(mock_chain, app, auth_env, client, d
"c50b93a32df1c9d700e3e80996845bc2e13be848",
1,
False,
False,
).on_error(error_callback),
fetch_gomod_source.si(1, [], []).on_error(error_callback),
process_fetched_sources.si(1).on_error(error_callback),
Expand Down
47 changes: 35 additions & 12 deletions tests/test_workers/test_scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def test_repo_name():
assert git_obj.repo_name == "release-engineering/retrodep"


@pytest.mark.parametrize("gitsubmodule", [True, False])
@pytest.mark.parametrize(
"gitsubmodule, include_git_dir", [(True, False), (False, False), (True, True), (False, True)]
)
@mock.patch("tarfile.open")
@mock.patch("tempfile.TemporaryDirectory")
@mock.patch("tempfile.NamedTemporaryFile")
Expand All @@ -51,6 +53,7 @@ def test_clone_and_archive(
mock_temp_dir,
mock_tarfile_open,
gitsubmodule,
include_git_dir,
):
# Mock the archive being created
mock_exists.return_value = True
Expand All @@ -69,14 +72,16 @@ def test_clone_and_archive(
git_obj = scm.Git(url, ref)

with mock.patch.object(git_obj.sources_dir, "archive_path", new=archive_path):
git_obj.clone_and_archive(gitsubmodule)
git_obj.clone_and_archive(gitsubmodule, include_git_dir)

kwargs = {"depth": 1} if include_git_dir else {}

# Verify the tempfile.TemporaryDirectory context manager was used twice:
# once for _clone_and_archive and once for _verify_archive
assert mock_temp_dir.return_value.__enter__.call_count == 2
# Verify the repo was cloned and checked out properly
mock_clone.assert_called_once_with(
url, "/tmp/cachito-temp/repo", no_checkout=True, env={"GIT_TERMINAL_PROMPT": "0"}
url, "/tmp/cachito-temp/repo", no_checkout=True, env={"GIT_TERMINAL_PROMPT": "0"}, **kwargs
)
assert mock_clone.return_value.head.reference == mock_commit
mock_clone.return_value.head.reset.assert_called_once_with(index=True, working_tree=True)
Expand All @@ -89,6 +94,9 @@ def test_clone_and_archive(
mock_ugs.assert_called_once_with(mock_clone.return_value)
else:
mock_ugs.assert_not_called()
#
if include_git_dir:
mock_clone.return_value.remote().fetch.assert_called_once_with(refspec=ref, **kwargs)

mock_clone.return_value.git.gc.assert_called_once_with("--prune=now")

Expand Down Expand Up @@ -155,7 +163,7 @@ def test_fetch_source_clone_if_no_archive_yet(mock_clone_and_archive, gitsubmodu
with po(scm_git.sources_dir.package_dir, "glob", return_value=[]):
scm_git.fetch_source(gitsubmodule)

mock_clone_and_archive.assert_called_once_with(gitsubmodule=gitsubmodule)
mock_clone_and_archive.assert_called_once_with(gitsubmodule=gitsubmodule, include_git_dir=False)


@pytest.mark.parametrize("gitsubmodule", [True, False])
Expand Down Expand Up @@ -194,7 +202,9 @@ def test_fetch_source_by_pull(mock_update_and_archive, mock_getctime, gitsubmodu
return_value=["29eh2a.tar.gz", "a8c2d2.tar.gz", "a8c2d2-with-submodules.tar.gz"],
):
scm_git.fetch_source(gitsubmodule)
mock_update_and_archive.assert_called_once_with("a8c2d2.tar.gz", gitsubmodule=gitsubmodule)
mock_update_and_archive.assert_called_once_with(
"a8c2d2.tar.gz", gitsubmodule=gitsubmodule, include_git_dir=False
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -242,25 +252,36 @@ def test_fetch_source_by_pull_corrupt_archive(

assert mock_update_and_archive.call_count == 2
calls = [
mock.call("a8c2d2.tar.gz", gitsubmodule=gitsubmodule),
mock.call("29eh2a.tar.gz", gitsubmodule=gitsubmodule),
mock.call("a8c2d2.tar.gz", gitsubmodule=gitsubmodule, include_git_dir=False),
mock.call("29eh2a.tar.gz", gitsubmodule=gitsubmodule, include_git_dir=False),
]
mock_update_and_archive.assert_has_calls(calls)
if all_corrupt:
mock_clone_and_archive.assert_called_once_with(gitsubmodule=gitsubmodule)
mock_clone_and_archive.assert_called_once_with(
gitsubmodule=gitsubmodule, include_git_dir=False
)
else:
mock_clone_and_archive.assert_not_called()


@pytest.mark.parametrize("gitsubmodule", [True, False])
@pytest.mark.parametrize(
"gitsubmodule, include_git_dir", [(True, False), (False, False), (True, True), (False, True)]
)
@mock.patch("tarfile.open")
@mock.patch("tempfile.TemporaryDirectory")
@mock.patch("git.Repo")
@mock.patch("cachito.workers.scm.run_cmd")
@mock.patch("os.path.exists")
@mock.patch("cachito.workers.scm.Git.update_git_submodules")
def test_update_and_archive(
mock_ugs, mock_exists, mock_fsck, mock_repo, mock_temp_dir, mock_tarfile_open, gitsubmodule
mock_ugs,
mock_exists,
mock_fsck,
mock_repo,
mock_temp_dir,
mock_tarfile_open,
gitsubmodule,
include_git_dir,
):
# Mock the archive being created
mock_exists.return_value = True
Expand All @@ -270,16 +291,18 @@ def test_update_and_archive(
# Mock the tempfile.TemporaryDirectory context manager
mock_temp_dir.return_value.__enter__.return_value = "/tmp/cachito-temp"

kwargs = {"depth": 1} if include_git_dir else {}

# Test does not really extract this archive file. The filename could be arbitrary.
scm.Git(url, ref).update_and_archive("/tmp/1234567.tar.gz", gitsubmodule)
scm.Git(url, ref).update_and_archive("/tmp/1234567.tar.gz", gitsubmodule, include_git_dir)

# Verify the tempfile.TemporaryDirectory context manager was used twice:
# once for _update_and_archive and once for _verify_archive
assert mock_temp_dir.return_value.__enter__.call_count == 2

repo = mock_repo.return_value
# Verify the changes are pulled.
repo.remote.return_value.fetch.assert_called_once_with(refspec=ref)
repo.remote.return_value.fetch.assert_called_once_with(refspec=ref, **kwargs)
# Verify the repo is reset to specific ref
repo.commit.assert_called_once_with(ref)
assert repo.commit.return_value == repo.head.reference
Expand Down

0 comments on commit 0a71376

Please sign in to comment.