Skip to content

Commit

Permalink
Use git's partial clone feature to speed up pip
Browse files Browse the repository at this point in the history
Clone with --filter=blob:none - as it fetches all
metadata, but only dynamically fetches the blobs as
needed by checkout. Since typically, pip only needs the blobs for
a single revision, this can be a big improvement, especially
when fetching from repositories with a lot of history,
particularly on slower network connections.

Added unit test for the rev-less path. Confirmed that both
of the if/else paths are tested by the unit tests.
  • Loading branch information
nipunn1313 committed Aug 9, 2021
1 parent b400ee3 commit 745ca16
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 1 deletion.
1 change: 1 addition & 0 deletions news/9086.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval.
11 changes: 10 additions & 1 deletion src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,21 @@ def fetch_new(self, dest, url, rev_options):
# type: (str, HiddenText, RevOptions) -> None
rev_display = rev_options.to_display()
logger.info('Cloning %s%s to %s', url, rev_display, display_path(dest))
self.run_command(make_command('clone', '-q', url, dest))
if self.get_git_version() >= (2, 17):
# Git added support for partial clone in 2.17
# https://git-scm.com/docs/partial-clone
# Speeds up cloning by functioning without a complete copy of repository
self.run_command(make_command(
'clone', '--filter=blob:none', '-q', url, dest,
))
else:
self.run_command(make_command('clone', '-q', url, dest))

if rev_options.rev:
# Then a specific revision was requested.
rev_options = self.resolve_revision(dest, url, rev_options)
branch_name = getattr(rev_options, 'branch_name', None)
logger.debug('Rev options %s, branch_name %s', rev_options, branch_name)
if branch_name is None:
# Only do a checkout if the current commit id doesn't match
# the requested revision.
Expand Down
97 changes: 97 additions & 0 deletions tests/functional/test_vcs_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import os
from unittest.mock import patch

import pytest

Expand Down Expand Up @@ -282,3 +283,99 @@ def test_resolve_commit_not_on_branch(script, tmp_path):
# check we can fetch our commit
rev_options = Git.make_rev_options(commit)
Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options)


def _initialize_clonetest_server(repo_path, script, enable_partial_clone):
repo_path.mkdir()
script.run("git", "init", cwd=str(repo_path))
repo_file = repo_path / "file.txt"
repo_file.write_text(u".")
script.run("git", "add", "file.txt", cwd=str(repo_path))
script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path))

# Enable filtering support on server
if enable_partial_clone:
script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path)
script.run("git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path)

return repo_file


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone(script, tmp_path):
"""Test partial clone w/ a git-server that supports it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=True)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - with server supporting filtering
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path2).stderr
)


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone_without_server_support(script, tmp_path):
"""Test partial clone w/ a git-server that does not support it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=False)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - even though server doesn't support filtering
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path2).stderr
)


def test_clone_without_partial_clone_support(script, tmp_path):
"""Older git clients don't support partial clone. Test the fallback path"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=True)
clone_path = repo_path / "clone1"

# Check that we can clone w/ old version of git w/o --filter
with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)):
Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options())

repo_file.write_text(u"...")
script.run("git", "commit", "-am", "third commit", cwd=str(repo_path))

# Should work fine w/o attempting to use `--filter` args
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path).stderr
)

0 comments on commit 745ca16

Please sign in to comment.