diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 0bb9703bb..140a1d331 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -132,6 +132,11 @@ {% endfor -%} {% endif -%} +{% if preassemble_script_directives -%} +USER root +RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR} +{% endif -%} + {% for sd in preassemble_script_directives -%} {{ sd }} {% endfor %} @@ -711,12 +716,8 @@ def get_preassemble_scripts(self): except FileNotFoundError: pass - return scripts - - def get_assemble_scripts(self): - assemble_scripts = [] if "py" in self.stencila_contexts: - assemble_scripts.extend( + scripts.extend( [ ( "${NB_USER}", @@ -728,7 +729,7 @@ def get_assemble_scripts(self): ] ) if self.stencila_manifest_dir: - assemble_scripts.extend( + scripts.extend( [ ( "${NB_USER}", @@ -741,7 +742,11 @@ def get_assemble_scripts(self): ) ] ) - return assemble_scripts + return scripts + + def get_assemble_scripts(self): + """Return directives to run after the entire repository has been added to the image""" + return [] def get_post_build_scripts(self): post_build = self.binder_path("postBuild") diff --git a/repo2docker/buildpacks/conda/__init__.py b/repo2docker/buildpacks/conda/__init__.py index 8e818bc4d..c781f00a0 100644 --- a/repo2docker/buildpacks/conda/__init__.py +++ b/repo2docker/buildpacks/conda/__init__.py @@ -6,6 +6,7 @@ from ruamel.yaml import YAML from ..base import BaseImage +from ...utils import is_local_pip_requirement # pattern for parsing conda dependency line PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)") @@ -127,6 +128,50 @@ def get_build_script_files(self): files.update(super().get_build_script_files()) return files + _environment_yaml = None + + @property + def environment_yaml(self): + if self._environment_yaml is not None: + return self._environment_yaml + + environment_yml = self.binder_path("environment.yml") + if not os.path.exists(environment_yml): + self._environment_yaml = {} + return self._environment_yaml + + with open(environment_yml) as f: + env = YAML().load(f) + # check if the env file is empty, if so instantiate an empty dictionary. + if env is None: + env = {} + # check if the env file provided a dict-like thing not a list or other data structure. + if not isinstance(env, Mapping): + raise TypeError( + "environment.yml should contain a dictionary. Got %r" % type(env) + ) + self._environment_yaml = env + + return self._environment_yaml + + @property + def _should_preassemble_env(self): + """Check for local pip requirements in environment.yaml + + If there are any local references, e.g. `-e .`, + stage the whole repo prior to installation. + """ + dependencies = self.environment_yaml.get("dependencies", []) + pip_requirements = None + for dep in dependencies: + if isinstance(dep, dict) and dep.get("pip"): + pip_requirements = dep["pip"] + if isinstance(pip_requirements, list): + for line in pip_requirements: + if is_local_pip_requirement(line): + return False + return True + @property def python_version(self): """Detect the Python version for a given `environment.yml` @@ -135,31 +180,17 @@ def python_version(self): or a Falsy empty string '' if not found. """ - environment_yml = self.binder_path("environment.yml") - if not os.path.exists(environment_yml): - return "" - if not hasattr(self, "_python_version"): py_version = None - with open(environment_yml) as f: - env = YAML().load(f) - # check if the env file is empty, if so instantiate an empty dictionary. - if env is None: - env = {} - # check if the env file provided a dick-like thing not a list or other data structure. - if not isinstance(env, Mapping): - raise TypeError( - "environment.yml should contain a dictionary. Got %r" - % type(env) - ) - for dep in env.get("dependencies", []): - if not isinstance(dep, str): - continue - match = PYTHON_REGEX.match(dep) - if not match: - continue - py_version = match.group(1) - break + env = self.environment_yaml + for dep in env.get("dependencies", []): + if not isinstance(dep, str): + continue + match = PYTHON_REGEX.match(dep) + if not match: + continue + py_version = match.group(1) + break # extract major.minor if py_version: @@ -178,14 +209,27 @@ def py2(self): """Am I building a Python 2 kernel environment?""" return self.python_version and self.python_version.split(".")[0] == "2" - def get_assemble_scripts(self): + def get_preassemble_script_files(self): + """preassembly only requires environment.yml + + enables caching assembly result even when + repo contents change + """ + assemble_files = super().get_preassemble_script_files() + if self._should_preassemble_env: + environment_yml = self.binder_path("environment.yml") + if os.path.exists(environment_yml): + assemble_files[environment_yml] = environment_yml + return assemble_files + + def get_env_scripts(self): """Return series of build-steps specific to this source repository. """ - assembly_scripts = [] + scripts = [] environment_yml = self.binder_path("environment.yml") env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}" if os.path.exists(environment_yml): - assembly_scripts.append( + scripts.append( ( "${NB_USER}", r""" @@ -197,7 +241,19 @@ def get_assemble_scripts(self): ), ) ) - return super().get_assemble_scripts() + assembly_scripts + return scripts + + def get_preassemble_scripts(self): + scripts = super().get_preassemble_scripts() + if self._should_preassemble_env: + scripts.extend(self.get_env_scripts()) + return scripts + + def get_assemble_scripts(self): + scripts = super().get_assemble_scripts() + if not self._should_preassemble_env: + scripts.extend(self.get_env_scripts()) + return scripts def detect(self): """Check if current repo should be built with the Conda BuildPack. diff --git a/repo2docker/buildpacks/pipfile/__init__.py b/repo2docker/buildpacks/pipfile/__init__.py index e91e63aa5..b38db08a8 100644 --- a/repo2docker/buildpacks/pipfile/__init__.py +++ b/repo2docker/buildpacks/pipfile/__init__.py @@ -65,6 +65,24 @@ def python_version(self): self._python_version = self.major_pythons["3"] return self._python_version + def get_preassemble_script_files(self): + """Return files needed for preassembly""" + files = super().get_preassemble_script_files() + for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"): + path = self.binder_path(name) + if os.path.exists(path): + files[path] = path + return files + + def get_preassemble_scripts(self): + """scripts to run prior to staging the repo contents""" + scripts = super().get_preassemble_scripts() + # install pipenv to install dependencies within Pipfile.lock or Pipfile + scripts.append( + ("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26") + ) + return scripts + def get_assemble_scripts(self): """Return series of build-steps specific to this repository. """ @@ -104,11 +122,6 @@ def get_assemble_scripts(self): # my_package_example = {path=".", editable=true} working_directory = self.binder_dir or "." - # install pipenv to install dependencies within Pipfile.lock or Pipfile - assemble_scripts.append( - ("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26") - ) - # NOTES: # - Without prioritizing the PATH to KERNEL_PYTHON_PREFIX over # NB_SERVER_PYTHON_PREFIX, 'pipenv' draws the wrong conclusion about diff --git a/repo2docker/buildpacks/python/__init__.py b/repo2docker/buildpacks/python/__init__.py index 4db5c72d8..72029d78b 100644 --- a/repo2docker/buildpacks/python/__init__.py +++ b/repo2docker/buildpacks/python/__init__.py @@ -2,6 +2,7 @@ import os from ..conda import CondaBuildPack +from ...utils import is_local_pip_requirement class PythonBuildPack(CondaBuildPack): @@ -34,24 +35,22 @@ def python_version(self): self._python_version = py_version return self._python_version - def get_assemble_scripts(self): - """Return series of build-steps specific to this repository. + def _get_pip_scripts(self): + """Get pip install scripts + + added to preassemble unless local references are found, + in which case this happens in assemble. """ - # If we have a runtime.txt & that's set to python-2.7, - # requirements.txt will be installed in the *kernel* env - # and requirements3.txt (if it exists) - # will be installed in the python 3 notebook server env. - assemble_scripts = super().get_assemble_scripts() - setup_py = "setup.py" # KERNEL_PYTHON_PREFIX is the env with the kernel, # whether it's distinct from the notebook or the same. pip = "${KERNEL_PYTHON_PREFIX}/bin/pip" + scripts = [] if self.py2: # using python 2 kernel, # requirements3.txt allows installation in the notebook server env nb_requirements_file = self.binder_path("requirements3.txt") if os.path.exists(nb_requirements_file): - assemble_scripts.append( + scripts.append( ( "${NB_USER}", # want the $NB_PYHTON_PREFIX environment variable, not for @@ -65,12 +64,65 @@ def get_assemble_scripts(self): # install requirements.txt in the kernel env requirements_file = self.binder_path("requirements.txt") if os.path.exists(requirements_file): - assemble_scripts.append( + scripts.append( ( "${NB_USER}", '{} install --no-cache-dir -r "{}"'.format(pip, requirements_file), ) ) + return scripts + + @property + def _should_preassemble_pip(self): + """Peek in requirements.txt to determine if we can assemble from only env files + + If there are any local references, e.g. `-e .`, + stage the whole repo prior to installation. + """ + if not os.path.exists("binder") and os.path.exists("setup.py"): + # can't install from subset if we're using setup.py + return False + for name in ("requirements.txt", "requirements3.txt"): + requirements_txt = self.binder_path(name) + if not os.path.exists(requirements_txt): + continue + with open(requirements_txt) as f: + for line in f: + if is_local_pip_requirement(line): + return False + + # didn't find any local references, + # allow assembly from subset + return True + + def get_preassemble_script_files(self): + assemble_files = super().get_preassemble_script_files() + for name in ("requirements.txt", "requirements3.txt"): + requirements_txt = self.binder_path(name) + if os.path.exists(requirements_txt): + assemble_files[requirements_txt] = requirements_txt + return assemble_files + + def get_preassemble_scripts(self): + """Return scripts to run before adding the full repository""" + scripts = super().get_preassemble_scripts() + if self._should_preassemble_pip: + scripts.extend(self._get_pip_scripts()) + return scripts + + def get_assemble_scripts(self): + """Return series of build steps that require the full repository""" + # If we have a runtime.txt & that's set to python-2.7, + # requirements.txt will be installed in the *kernel* env + # and requirements3.txt (if it exists) + # will be installed in the python 3 notebook server env. + assemble_scripts = super().get_assemble_scripts() + setup_py = "setup.py" + # KERNEL_PYTHON_PREFIX is the env with the kernel, + # whether it's distinct from the notebook or the same. + pip = "${KERNEL_PYTHON_PREFIX}/bin/pip" + if not self._should_preassemble_pip: + assemble_scripts.extend(self._get_pip_scripts()) # setup.py exists *and* binder dir is not used if not self.binder_dir and os.path.exists(setup_py): diff --git a/repo2docker/utils.py b/repo2docker/utils.py index a53ba5ef1..1c0a43341 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -431,3 +431,29 @@ def normalize_doi(val): (e.g. https://doi.org/10.1234/jshd123)""" m = doi_regexp.match(val) return m.group(2) + + +def is_local_pip_requirement(line): + """Return whether a pip requirement (e.g. in requirements.txt file) references a local file""" + # trim comments and skip empty lines + line = line.split("#", 1)[0].strip() + if not line: + return False + if line.startswith(("-r", "-c")): + # local -r or -c references break isolation + return True + # strip off `-e, etc.` + if line.startswith("-"): + line = line.split(None, 1)[1] + if "file://" in line: + # file references break isolation + return True + if "://" in line: + # handle git://../local/file + path = line.split("://", 1)[1] + else: + path = line + if path.startswith("."): + # references a local file + return True + return False diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6ad1ff281..951f2085d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -110,3 +110,20 @@ def test_normalize_doi(): assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123" assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123" assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123" + + +@pytest.mark.parametrize( + "req, is_local", + [ + ("-r requirements.txt", True), + ("-e .", True), + ("file://subdir", True), + ("file://./subdir", True), + ("git://github.com/jupyter/repo2docker", False), + ("git+https://github.com/jupyter/repo2docker", False), + ("numpy", False), + ("# -e .", False), + ], +) +def test_local_pip_requirement(req, is_local): + assert utils.is_local_pip_requirement(req) == is_local