diff --git a/repo2docker/__main__.py b/repo2docker/__main__.py index 58caaf64d..0e427221f 100644 --- a/repo2docker/__main__.py +++ b/repo2docker/__main__.py @@ -196,7 +196,6 @@ def get_argparser(): '--cache-from', action='append', default=[], - #help=self.traits()['cache_from'].help ) return argparser @@ -208,7 +207,6 @@ def make_r2d(argv=None): if argv is None: argv = sys.argv[1:] - # version must be checked before parse, as repo/cmd are required and # will spit out an error if allowed to be parsed first. if '--version' in argv: @@ -244,9 +242,11 @@ def make_r2d(argv=None): extra=dict(phase='failed')) sys.exit(1) - if args.image_name: r2d.output_image_spec = args.image_name + else: + # we will pick a name after fetching the repository + r2d.output_image_spec = "" r2d.json_logs = args.json_logs @@ -343,5 +343,6 @@ def main(): r2d.log.exception(e) sys.exit(1) + if __name__ == '__main__': main() diff --git a/repo2docker/app.py b/repo2docker/app.py index 12f9ace08..9e664ccda 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -13,7 +13,6 @@ import logging import os import pwd -import subprocess import shutil import tempfile import time @@ -31,8 +30,7 @@ from . import __version__ from .buildpacks import ( PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack, - CondaBuildPack, JuliaBuildPack, BaseImage, - RBuildPack, NixBuildPack + CondaBuildPack, JuliaBuildPack, RBuildPack, NixBuildPack ) from . import contentproviders from .utils import ( @@ -364,7 +362,21 @@ def fetch(self, url, ref, checkout_path): spec, checkout_path, yield_output=self.json_logs): self.log.info(log_line, extra=dict(phase='fetching')) - + if not self.output_image_spec: + self.output_image_spec = ( + 'r2d' + escapism.escape(self.repo, escape_char='-').lower() + ) + # if we are building from a subdirectory include that in the + # image name so we can tell builds from different sub-directories + # apart. + if self.subdir: + self.output_image_spec += ( + escapism.escape(self.subdir, escape_char='-').lower() + ) + if picked_content_provider.content_id is not None: + self.output_image_spec += picked_content_provider.content_id + else: + self.output_image_spec += str(int(time.time())) def json_excepthook(self, etype, evalue, traceback): """Called on an uncaught exception when using json logging @@ -399,15 +411,6 @@ def initialize(self): fmt='%(message)s' ) - if self.output_image_spec == "": - # Attempt to set a sane default! - # HACK: Provide something more descriptive? - self.output_image_spec = ( - 'r2d' + - escapism.escape(self.repo, escape_char='-').lower() + - str(int(time.time())) - ) - if self.dry_run and (self.run or self.push): raise ValueError("Cannot push or run image if we are not building it") @@ -546,6 +549,20 @@ def _get_free_port(self): s.close() return port + def find_image(self): + # if this is a dry run it is Ok for dockerd to be unreachable so we + # always return False for dry runs. + if self.dry_run: + return False + # check if we already have an image for this content + client = docker.APIClient(version='auto', **kwargs_from_env()) + for image in client.images(): + if image['RepoTags'] is not None: + for tag in image['RepoTags']: + if tag == self.output_image_spec + ":latest": + return True + return False + def build(self): """ Build docker image @@ -553,8 +570,8 @@ def build(self): # Check if r2d can connect to docker daemon if not self.dry_run: try: - api_client = docker.APIClient(version='auto', - **kwargs_from_env()) + docker_client = docker.APIClient(version='auto', + **kwargs_from_env()) except DockerException as e: self.log.exception(e) raise @@ -574,6 +591,14 @@ def build(self): try: self.fetch(self.repo, self.ref, checkout_path) + if self.find_image(): + self.log.info("Reusing existing image ({}), not " + "building.".format(self.output_image_spec)) + # no need to build, so skip to the end by `return`ing here + # this will still execute the finally clause and let's us + # avoid having to indent the build code by an extra level + return + if self.subdir: checkout_path = os.path.join(checkout_path, self.subdir) if not os.path.isdir(checkout_path): @@ -610,8 +635,11 @@ def build(self): self.log.info('Using %s builder\n', bp.__class__.__name__, extra=dict(phase='building')) - for l in picked_buildpack.build(api_client, self.output_image_spec, - self.build_memory_limit, build_args, self.cache_from): + for l in picked_buildpack.build(docker_client, + self.output_image_spec, + self.build_memory_limit, + build_args, + self.cache_from): if 'stream' in l: self.log.info(l['stream'], extra=dict(phase='building')) @@ -624,8 +652,9 @@ def build(self): else: self.log.info(json.dumps(l), extra=dict(phase='building')) + finally: - # Cheanup checkout if necessary + # Cleanup checkout if necessary if self.cleanup_checkout: shutil.rmtree(checkout_path, ignore_errors=True) diff --git a/repo2docker/contentproviders/base.py b/repo2docker/contentproviders/base.py index 9b9863258..7cdf0d5b0 100644 --- a/repo2docker/contentproviders/base.py +++ b/repo2docker/contentproviders/base.py @@ -17,6 +17,20 @@ class ContentProvider: def __init__(self): self.log = logging.getLogger("repo2docker") + @property + def content_id(self): + """A unique ID to represent the version of the content. + This ID is used to name the built images. If the ID is the same between + two runs of repo2docker we will reuse an existing image (if it exists). + By providing an ID that summarizes the content we can reuse existing + images and speed up build times. A good ID is the revision of a Git + repository or a hash computed from all the content. + The type content ID can be any string. + To disable this behaviour set this property to `None` in which case + a fresh image will always be built. + """ + return None + def detect(self, repo, ref=None, extra_args=None): """Determine compatibility between source and this provider. diff --git a/repo2docker/contentproviders/git.py b/repo2docker/contentproviders/git.py index 1060a670c..fdade7d0e 100644 --- a/repo2docker/contentproviders/git.py +++ b/repo2docker/contentproviders/git.py @@ -44,3 +44,14 @@ def fetch(self, spec, output_dir, yield_output=False): cwd=output_dir, capture=yield_output): yield line + + cmd = ['git', 'rev-parse', 'HEAD'] + sha1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=output_dir) + self._sha1 = sha1.stdout.read().decode().strip() + + @property + def content_id(self): + """A unique ID to represent the version of the content. + Uses the first seven characters of the git commit ID of the repository. + """ + return self._sha1[:7] diff --git a/tests/conftest.py b/tests/conftest.py index 36112b5bd..8d0fa5e5e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,8 +12,11 @@ import pipes import shlex import requests +import subprocess import time +from tempfile import TemporaryDirectory + import pytest import yaml @@ -77,6 +80,35 @@ def run_test(args): return run_test +@pytest.fixture() +def git_repo(): + """ + Make a dummy git repo in which user can perform git operations + + Should be used as a contextmanager, it will delete directory when done + """ + with TemporaryDirectory() as gitdir: + subprocess.check_call(['git', 'init'], cwd=gitdir) + yield gitdir + + +@pytest.fixture() +def repo_with_content(git_repo): + """Create a git repository with content""" + with open(os.path.join(git_repo, 'test'), 'w') as f: + f.write("Hello") + + subprocess.check_call(['git', 'add', 'test'], cwd=git_repo) + subprocess.check_call(['git', 'commit', '-m', 'Test commit'], + cwd=git_repo) + # get the commit's SHA1 + sha1 = subprocess.Popen(['git', 'rev-parse', 'HEAD'], + stdout=subprocess.PIPE, cwd=git_repo) + sha1 = sha1.stdout.read().decode().strip() + + yield git_repo, sha1 + + class Repo2DockerTest(pytest.Function): """A pytest.Item for running repo2docker""" def __init__(self, name, parent, args): diff --git a/tests/unit/contentproviders/test_git.py b/tests/unit/contentproviders/test_git.py index 023e2e490..e377f5621 100644 --- a/tests/unit/contentproviders/test_git.py +++ b/tests/unit/contentproviders/test_git.py @@ -1,50 +1,34 @@ -from contextlib import contextmanager import os -import subprocess import pytest from tempfile import TemporaryDirectory from repo2docker.contentproviders import Git -@contextmanager -def git_repo(): - """ - Makes a dummy git repo in which user can perform git operations - - Should be used as a contextmanager, it will delete directory when done - """ - - with TemporaryDirectory() as gitdir: - subprocess.check_call(['git', 'init'], cwd=gitdir) - yield gitdir - - -def test_clone(): +def test_clone(repo_with_content): """Test simple git clone to a target dir""" - with git_repo() as upstream: - with open(os.path.join(upstream, 'test'), 'w') as f: - f.write("Hello") + upstream, sha1 = repo_with_content - subprocess.check_call(['git', 'add', 'test'], cwd=upstream) - subprocess.check_call(['git', 'commit', '-m', 'Test commit'], - cwd=upstream) + with TemporaryDirectory() as clone_dir: + spec = {'repo': upstream} + git_content = Git() + for _ in git_content.fetch(spec, clone_dir): + pass + assert os.path.exists(os.path.join(clone_dir, 'test')) + + assert git_content.content_id == sha1[:7] - with TemporaryDirectory() as clone_dir: - spec = {'repo': upstream} - for _ in Git().fetch(spec, clone_dir): - pass - assert os.path.exists(os.path.join(clone_dir, 'test')) -def test_bad_ref(): +def test_bad_ref(repo_with_content): """ Test trying to checkout a ref that doesn't exist """ - with git_repo() as upstream: - with TemporaryDirectory() as clone_dir: - spec = {'repo': upstream, 'ref': 'does-not-exist'} - with pytest.raises(ValueError): - for _ in Git().fetch(spec, clone_dir): - pass + upstream, sha1 = repo_with_content + with TemporaryDirectory() as clone_dir: + spec = {'repo': upstream, 'ref': 'does-not-exist'} + with pytest.raises(ValueError): + for _ in Git().fetch(spec, clone_dir): + pass + def test_always_accept(): # The git content provider should always accept a spec diff --git a/tests/unit/contentproviders/test_local.py b/tests/unit/contentproviders/test_local.py index 8212adef9..62b5eddb8 100644 --- a/tests/unit/contentproviders/test_local.py +++ b/tests/unit/contentproviders/test_local.py @@ -24,6 +24,13 @@ def test_not_detect_local_file(): assert spec is None, spec +def test_content_id_is_None(): + # content_id property should always be None for local content provider + # as we rely on the caching done by docker + local = Local() + assert local.content_id is None + + def test_content_available(): # create a directory with files, check they are available in the output # directory @@ -31,7 +38,11 @@ def test_content_available(): with open(os.path.join(d, 'test'), 'w') as f: f.write("Hello") + local = Local() spec = {'path': d} - for _ in Local().fetch(spec, d): + for _ in local.fetch(spec, d): pass assert os.path.exists(os.path.join(d, 'test')) + # content_id property should always be None for local content provider + # as we rely on the caching done by docker + assert local.content_id is None diff --git a/tests/unit/test_app.py b/tests/unit/test_app.py new file mode 100644 index 000000000..e828ed9ce --- /dev/null +++ b/tests/unit/test_app.py @@ -0,0 +1,74 @@ +from tempfile import TemporaryDirectory +from unittest.mock import patch + +import escapism + +from repo2docker.app import Repo2Docker +from repo2docker.__main__ import make_r2d + + +def test_find_image(): + images = [{'RepoTags': ['some-org/some-repo:latest']}] + + with patch('repo2docker.app.docker.APIClient') as FakeDockerClient: + instance = FakeDockerClient.return_value + instance.images.return_value = images + + r2d = Repo2Docker() + r2d.output_image_spec = 'some-org/some-repo' + assert r2d.find_image() + + instance.images.assert_called_with() + + +def test_dont_find_image(): + images = [{'RepoTags': ['some-org/some-image-name:latest']}] + + with patch('repo2docker.app.docker.APIClient') as FakeDockerClient: + instance = FakeDockerClient.return_value + instance.images.return_value = images + + r2d = Repo2Docker() + r2d.output_image_spec = 'some-org/some-other-image-name' + assert not r2d.find_image() + + instance.images.assert_called_with() + + +def test_image_name_remains_unchanged(): + # if we specify an image name, it should remain unmodified + with TemporaryDirectory() as src: + app = Repo2Docker() + argv = ['--image-name', 'a-special-name', '--no-build', src] + app = make_r2d(argv) + + app.start() + + assert app.output_image_spec == 'a-special-name' + + +def test_image_name_contains_sha1(repo_with_content): + upstream, sha1 = repo_with_content + app = Repo2Docker() + # force selection of the git content provider by prefixing path with + # file://. This is important as the Local content provider does not + # store the SHA1 in the repo spec + argv = ['--no-build', 'file://' + upstream] + app = make_r2d(argv) + + app.start() + + assert app.output_image_spec.endswith(sha1[:7]) + + +def test_local_dir_image_name(repo_with_content): + upstream, sha1 = repo_with_content + app = Repo2Docker() + argv = ['--no-build', upstream] + app = make_r2d(argv) + + app.start() + + assert app.output_image_spec.startswith( + 'r2d' + escapism.escape(upstream, escape_char='-').lower() + ) diff --git a/tests/unit/test_memlimit.py b/tests/unit/test_memlimit.py index b8c53c6c3..f636da7ce 100644 --- a/tests/unit/test_memlimit.py +++ b/tests/unit/test_memlimit.py @@ -66,7 +66,6 @@ def test_memlimit_nondockerfile(tmpdir, test, mem_limit, mem_allocate_mb, expect assert success == expected - def test_memlimit_same_postbuild(): """ Validate that the postBuild files for dockerfile & nondockerfile are same diff --git a/tests/unit/test_subdir.py b/tests/unit/test_subdir.py index 89f0884b3..b33e91278 100644 --- a/tests/unit/test_subdir.py +++ b/tests/unit/test_subdir.py @@ -2,7 +2,8 @@ Test if the subdirectory is correctly navigated to """ import os -import logging + +import escapism import pytest from repo2docker.app import Repo2Docker @@ -23,10 +24,20 @@ def test_subdir(run_repo2docker): assert cwd == os.getcwd(), "We should be back in %s" % cwd +def test_subdir_in_image_name(): + app = Repo2Docker( + repo=TEST_REPO, + subdir='a directory', + ) + app.initialize() + app.build() + + escaped_dirname = escapism.escape('a directory', escape_char='-').lower() + assert escaped_dirname in app.output_image_spec + + def test_subdir_invalid(caplog): # test an error is raised when requesting a non existent subdir - #caplog.set_level(logging.INFO, logger='Repo2Docker') - app = Repo2Docker( repo=TEST_REPO, subdir='invalid-sub-dir',