Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use git's partial clone feature to speed up pip #9086

Merged
merged 1 commit into from
Aug 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/9086.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval.
17 changes: 16 additions & 1 deletion src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,27 @@ def fetch_new(self, dest, url, rev_options):
# type: (str, HiddenText, RevOptions) -> None
rev_display = rev_options.to_display()
logger.info("Cloning %s%s to %s", url, rev_display, display_path(dest))
self.run_command(make_command("clone", "-q", url, dest))
if self.get_git_version() >= (2, 17):
# Git added support for partial clone in 2.17
# https://git-scm.com/docs/partial-clone
# Speeds up cloning by functioning without a complete copy of repository
self.run_command(
make_command(
"clone",
"--filter=blob:none",
"-q",
url,
dest,
)
)
else:
self.run_command(make_command("clone", "-q", url, dest))

if rev_options.rev:
# Then a specific revision was requested.
rev_options = self.resolve_revision(dest, url, rev_options)
branch_name = getattr(rev_options, "branch_name", None)
logger.debug("Rev options %s, branch_name %s", rev_options, branch_name)
if branch_name is None:
# Only do a checkout if the current commit id doesn't match
# the requested revision.
Expand Down
105 changes: 105 additions & 0 deletions tests/functional/test_vcs_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import os
from unittest.mock import patch

import pytest

Expand Down Expand Up @@ -282,3 +283,107 @@ def test_resolve_commit_not_on_branch(script, tmp_path):
# check we can fetch our commit
rev_options = Git.make_rev_options(commit)
Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options)


def _initialize_clonetest_server(repo_path, script, enable_partial_clone):
repo_path.mkdir()
script.run("git", "init", cwd=str(repo_path))
repo_file = repo_path / "file.txt"
repo_file.write_text(u".")
script.run("git", "add", "file.txt", cwd=str(repo_path))
script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path))

# Enable filtering support on server
if enable_partial_clone:
script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path)
script.run(
"git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path
)

return repo_file


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone(script, tmp_path):
"""Test partial clone w/ a git-server that supports it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=True
)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - with server supporting filtering
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path2).stderr
)


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone_without_server_support(script, tmp_path):
"""Test partial clone w/ a git-server that does not support it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=False
)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - even though server doesn't support filtering
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path2).stderr
)


def test_clone_without_partial_clone_support(script, tmp_path):
"""Older git clients don't support partial clone. Test the fallback path"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=True
)
clone_path = repo_path / "clone1"

# Check that we can clone w/ old version of git w/o --filter
with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)):
Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options())

repo_file.write_text(u"...")
script.run("git", "commit", "-am", "third commit", cwd=str(repo_path))

# Should work fine w/o attempting to use `--filter` args
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path).stderr
)