Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Add caching of already built repositories #511

Merged
merged 1 commit into from Dec 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 4 additions & 3 deletions repo2docker/__main__.py
Expand Up @@ -196,7 +196,6 @@ def get_argparser():
'--cache-from',
action='append',
default=[],
#help=self.traits()['cache_from'].help
)

return argparser
Expand All @@ -208,7 +207,6 @@ def make_r2d(argv=None):
if argv is None:
argv = sys.argv[1:]


# version must be checked before parse, as repo/cmd are required and
# will spit out an error if allowed to be parsed first.
if '--version' in argv:
Expand Down Expand Up @@ -244,9 +242,11 @@ def make_r2d(argv=None):
extra=dict(phase='failed'))
sys.exit(1)


if args.image_name:
r2d.output_image_spec = args.image_name
else:
# we will pick a name after fetching the repository
r2d.output_image_spec = ""

r2d.json_logs = args.json_logs

Expand Down Expand Up @@ -343,5 +343,6 @@ def main():
r2d.log.exception(e)
sys.exit(1)


if __name__ == '__main__':
main()
65 changes: 47 additions & 18 deletions repo2docker/app.py
Expand Up @@ -13,7 +13,6 @@
import logging
import os
import pwd
import subprocess
import shutil
import tempfile
import time
Expand All @@ -31,8 +30,7 @@
from . import __version__
from .buildpacks import (
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
CondaBuildPack, JuliaBuildPack, BaseImage,
RBuildPack, NixBuildPack
CondaBuildPack, JuliaBuildPack, RBuildPack, NixBuildPack
)
from . import contentproviders
from .utils import (
Expand Down Expand Up @@ -364,7 +362,21 @@ def fetch(self, url, ref, checkout_path):
spec, checkout_path, yield_output=self.json_logs):
self.log.info(log_line, extra=dict(phase='fetching'))


if not self.output_image_spec:
self.output_image_spec = (
'r2d' + escapism.escape(self.repo, escape_char='-').lower()
)
# if we are building from a subdirectory include that in the
# image name so we can tell builds from different sub-directories
# apart.
if self.subdir:
self.output_image_spec += (
escapism.escape(self.subdir, escape_char='-').lower()
)
if picked_content_provider.content_id is not None:
self.output_image_spec += picked_content_provider.content_id
else:
self.output_image_spec += str(int(time.time()))

def json_excepthook(self, etype, evalue, traceback):
"""Called on an uncaught exception when using json logging
Expand Down Expand Up @@ -399,15 +411,6 @@ def initialize(self):
fmt='%(message)s'
)

if self.output_image_spec == "":
# Attempt to set a sane default!
# HACK: Provide something more descriptive?
self.output_image_spec = (
'r2d' +
escapism.escape(self.repo, escape_char='-').lower() +
str(int(time.time()))
)

if self.dry_run and (self.run or self.push):
raise ValueError("Cannot push or run image if we are not building it")

Expand Down Expand Up @@ -546,15 +549,29 @@ def _get_free_port(self):
s.close()
return port

def find_image(self):
# if this is a dry run it is Ok for dockerd to be unreachable so we
# always return False for dry runs.
if self.dry_run:
return False
# check if we already have an image for this content
client = docker.APIClient(version='auto', **kwargs_from_env())
for image in client.images():
if image['RepoTags'] is not None:
for tag in image['RepoTags']:
if tag == self.output_image_spec + ":latest":
return True
return False

def build(self):
"""
Build docker image
"""
# Check if r2d can connect to docker daemon
if not self.dry_run:
try:
api_client = docker.APIClient(version='auto',
**kwargs_from_env())
docker_client = docker.APIClient(version='auto',
**kwargs_from_env())
except DockerException as e:
self.log.exception(e)
raise
Expand All @@ -574,6 +591,14 @@ def build(self):
try:
self.fetch(self.repo, self.ref, checkout_path)

if self.find_image():
self.log.info("Reusing existing image ({}), not "
"building.".format(self.output_image_spec))
# no need to build, so skip to the end by `return`ing here
# this will still execute the finally clause and let's us
# avoid having to indent the build code by an extra level
return

if self.subdir:
checkout_path = os.path.join(checkout_path, self.subdir)
if not os.path.isdir(checkout_path):
Expand Down Expand Up @@ -610,8 +635,11 @@ def build(self):
self.log.info('Using %s builder\n', bp.__class__.__name__,
extra=dict(phase='building'))

for l in picked_buildpack.build(api_client, self.output_image_spec,
self.build_memory_limit, build_args, self.cache_from):
for l in picked_buildpack.build(docker_client,
self.output_image_spec,
self.build_memory_limit,
build_args,
self.cache_from):
if 'stream' in l:
self.log.info(l['stream'],
extra=dict(phase='building'))
Expand All @@ -624,8 +652,9 @@ def build(self):
else:
self.log.info(json.dumps(l),
extra=dict(phase='building'))

finally:
# Cheanup checkout if necessary
# Cleanup checkout if necessary
if self.cleanup_checkout:
shutil.rmtree(checkout_path, ignore_errors=True)

Expand Down
14 changes: 14 additions & 0 deletions repo2docker/contentproviders/base.py
Expand Up @@ -17,6 +17,20 @@ class ContentProvider:
def __init__(self):
self.log = logging.getLogger("repo2docker")

@property
def content_id(self):
"""A unique ID to represent the version of the content.
This ID is used to name the built images. If the ID is the same between
two runs of repo2docker we will reuse an existing image (if it exists).
By providing an ID that summarizes the content we can reuse existing
images and speed up build times. A good ID is the revision of a Git
repository or a hash computed from all the content.
The type content ID can be any string.
To disable this behaviour set this property to `None` in which case
a fresh image will always be built.
"""
return None

def detect(self, repo, ref=None, extra_args=None):
"""Determine compatibility between source and this provider.

Expand Down
11 changes: 11 additions & 0 deletions repo2docker/contentproviders/git.py
Expand Up @@ -44,3 +44,14 @@ def fetch(self, spec, output_dir, yield_output=False):
cwd=output_dir,
capture=yield_output):
yield line

cmd = ['git', 'rev-parse', 'HEAD']
sha1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=output_dir)
self._sha1 = sha1.stdout.read().decode().strip()

@property
def content_id(self):
"""A unique ID to represent the version of the content.
Uses the first seven characters of the git commit ID of the repository.
"""
return self._sha1[:7]
32 changes: 32 additions & 0 deletions tests/conftest.py
Expand Up @@ -12,8 +12,11 @@
import pipes
import shlex
import requests
import subprocess
import time

from tempfile import TemporaryDirectory

import pytest
import yaml

Expand Down Expand Up @@ -77,6 +80,35 @@ def run_test(args):
return run_test


@pytest.fixture()
def git_repo():
"""
Make a dummy git repo in which user can perform git operations

Should be used as a contextmanager, it will delete directory when done
"""
with TemporaryDirectory() as gitdir:
subprocess.check_call(['git', 'init'], cwd=gitdir)
yield gitdir


@pytest.fixture()
def repo_with_content(git_repo):
"""Create a git repository with content"""
with open(os.path.join(git_repo, 'test'), 'w') as f:
f.write("Hello")

subprocess.check_call(['git', 'add', 'test'], cwd=git_repo)
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
cwd=git_repo)
# get the commit's SHA1
sha1 = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
stdout=subprocess.PIPE, cwd=git_repo)
sha1 = sha1.stdout.read().decode().strip()

yield git_repo, sha1


class Repo2DockerTest(pytest.Function):
"""A pytest.Item for running repo2docker"""
def __init__(self, name, parent, args):
Expand Down
52 changes: 18 additions & 34 deletions tests/unit/contentproviders/test_git.py
@@ -1,50 +1,34 @@
from contextlib import contextmanager
import os
import subprocess
import pytest
from tempfile import TemporaryDirectory
from repo2docker.contentproviders import Git


@contextmanager
def git_repo():
"""
Makes a dummy git repo in which user can perform git operations

Should be used as a contextmanager, it will delete directory when done
"""

with TemporaryDirectory() as gitdir:
subprocess.check_call(['git', 'init'], cwd=gitdir)
yield gitdir


def test_clone():
def test_clone(repo_with_content):
"""Test simple git clone to a target dir"""
with git_repo() as upstream:
with open(os.path.join(upstream, 'test'), 'w') as f:
f.write("Hello")
upstream, sha1 = repo_with_content

subprocess.check_call(['git', 'add', 'test'], cwd=upstream)
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
cwd=upstream)
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream}
git_content = Git()
for _ in git_content.fetch(spec, clone_dir):
pass
assert os.path.exists(os.path.join(clone_dir, 'test'))

assert git_content.content_id == sha1[:7]

with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream}
for _ in Git().fetch(spec, clone_dir):
pass
assert os.path.exists(os.path.join(clone_dir, 'test'))

def test_bad_ref():
def test_bad_ref(repo_with_content):
"""
Test trying to checkout a ref that doesn't exist
"""
with git_repo() as upstream:
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream, 'ref': 'does-not-exist'}
with pytest.raises(ValueError):
for _ in Git().fetch(spec, clone_dir):
pass
upstream, sha1 = repo_with_content
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream, 'ref': 'does-not-exist'}
with pytest.raises(ValueError):
for _ in Git().fetch(spec, clone_dir):
pass


def test_always_accept():
# The git content provider should always accept a spec
Expand Down
13 changes: 12 additions & 1 deletion tests/unit/contentproviders/test_local.py
Expand Up @@ -24,14 +24,25 @@ def test_not_detect_local_file():
assert spec is None, spec


def test_content_id_is_None():
# content_id property should always be None for local content provider
# as we rely on the caching done by docker
local = Local()
assert local.content_id is None


def test_content_available():
# create a directory with files, check they are available in the output
# directory
with TemporaryDirectory() as d:
with open(os.path.join(d, 'test'), 'w') as f:
f.write("Hello")

local = Local()
spec = {'path': d}
for _ in Local().fetch(spec, d):
for _ in local.fetch(spec, d):
pass
assert os.path.exists(os.path.join(d, 'test'))
# content_id property should always be None for local content provider
# as we rely on the caching done by docker
assert local.content_id is None