Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] preassembly for conda/python #743

Merged
merged 12 commits into from Jul 19, 2019
19 changes: 12 additions & 7 deletions repo2docker/buildpacks/base.py
Expand Up @@ -132,6 +132,11 @@
{% endfor -%}
{% endif -%}

{% if preassemble_script_directives -%}
USER root
RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR}
{% endif -%}

{% for sd in preassemble_script_directives -%}
{{ sd }}
{% endfor %}
Expand Down Expand Up @@ -711,12 +716,8 @@ def get_preassemble_scripts(self):
except FileNotFoundError:
pass

return scripts

def get_assemble_scripts(self):
assemble_scripts = []
if "py" in self.stencila_contexts:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -728,7 +729,7 @@ def get_assemble_scripts(self):
]
)
if self.stencila_manifest_dir:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -741,7 +742,11 @@ def get_assemble_scripts(self):
)
]
)
return assemble_scripts
return scripts

def get_assemble_scripts(self):
"""Return directives to run after the entire repository has been added to the image"""
return []

def get_post_build_scripts(self):
post_build = self.binder_path("postBuild")
Expand Down
110 changes: 83 additions & 27 deletions repo2docker/buildpacks/conda/__init__.py
Expand Up @@ -6,6 +6,7 @@
from ruamel.yaml import YAML

from ..base import BaseImage
from ...utils import is_local_pip_requirement

# pattern for parsing conda dependency line
PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)")
Expand Down Expand Up @@ -127,6 +128,50 @@ def get_build_script_files(self):
files.update(super().get_build_script_files())
return files

_environment_yaml = None

@property
def environment_yaml(self):
if self._environment_yaml is not None:
return self._environment_yaml

environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
self._environment_yaml = {}
return self._environment_yaml

with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dict-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r" % type(env)
)
self._environment_yaml = env

return self._environment_yaml

@property
def _should_preassemble_env(self):
"""Check for local pip requirements in environment.yaml

If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
dependencies = self.environment_yaml.get("dependencies", [])
pip_requirements = None
for dep in dependencies:
if isinstance(dep, dict) and dep.get("pip"):
pip_requirements = dep["pip"]
if isinstance(pip_requirements, list):
for line in pip_requirements:
if is_local_pip_requirement(line):
return False
return True

@property
def python_version(self):
"""Detect the Python version for a given `environment.yml`
Expand All @@ -135,31 +180,17 @@ def python_version(self):
or a Falsy empty string '' if not found.

"""
environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
return ""

if not hasattr(self, "_python_version"):
py_version = None
with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dick-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r"
% type(env)
)
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break
env = self.environment_yaml
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break

# extract major.minor
if py_version:
Expand All @@ -178,14 +209,27 @@ def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split(".")[0] == "2"

def get_assemble_scripts(self):
def get_preassemble_script_files(self):
"""preassembly only requires environment.yml

enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_preassemble_script_files()
if self._should_preassemble_env:
environment_yml = self.binder_path("environment.yml")
if os.path.exists(environment_yml):
assemble_files[environment_yml] = environment_yml
return assemble_files

def get_env_scripts(self):
"""Return series of build-steps specific to this source repository.
"""
assembly_scripts = []
scripts = []
environment_yml = self.binder_path("environment.yml")
env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}"
if os.path.exists(environment_yml):
assembly_scripts.append(
scripts.append(
(
"${NB_USER}",
r"""
Expand All @@ -197,7 +241,19 @@ def get_assemble_scripts(self):
),
)
)
return super().get_assemble_scripts() + assembly_scripts
return scripts

def get_preassemble_scripts(self):
scripts = super().get_preassemble_scripts()
if self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts

def get_assemble_scripts(self):
scripts = super().get_assemble_scripts()
if not self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts

def detect(self):
"""Check if current repo should be built with the Conda BuildPack.
Expand Down
23 changes: 18 additions & 5 deletions repo2docker/buildpacks/pipfile/__init__.py
Expand Up @@ -65,6 +65,24 @@ def python_version(self):
self._python_version = self.major_pythons["3"]
return self._python_version

def get_preassemble_script_files(self):
"""Return files needed for preassembly"""
files = super().get_preassemble_script_files()
for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"):
path = self.binder_path(name)
if os.path.exists(path):
files[path] = path
return files

def get_preassemble_scripts(self):
"""scripts to run prior to staging the repo contents"""
scripts = super().get_preassemble_scripts()
# install pipenv to install dependencies within Pipfile.lock or Pipfile
scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)
return scripts

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
"""
Expand Down Expand Up @@ -104,11 +122,6 @@ def get_assemble_scripts(self):
# my_package_example = {path=".", editable=true}
working_directory = self.binder_dir or "."

# install pipenv to install dependencies within Pipfile.lock or Pipfile
assemble_scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)

# NOTES:
# - Without prioritizing the PATH to KERNEL_PYTHON_PREFIX over
# NB_SERVER_PYTHON_PREFIX, 'pipenv' draws the wrong conclusion about
Expand Down
72 changes: 62 additions & 10 deletions repo2docker/buildpacks/python/__init__.py
Expand Up @@ -2,6 +2,7 @@
import os

from ..conda import CondaBuildPack
from ...utils import is_local_pip_requirement


class PythonBuildPack(CondaBuildPack):
Expand Down Expand Up @@ -34,24 +35,22 @@ def python_version(self):
self._python_version = py_version
return self._python_version

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
def _get_pip_scripts(self):
"""Get pip install scripts

added to preassemble unless local references are found,
in which case this happens in assemble.
"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
scripts = []
if self.py2:
# using python 2 kernel,
# requirements3.txt allows installation in the notebook server env
nb_requirements_file = self.binder_path("requirements3.txt")
if os.path.exists(nb_requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
# want the $NB_PYHTON_PREFIX environment variable, not for
Expand All @@ -65,12 +64,65 @@ def get_assemble_scripts(self):
# install requirements.txt in the kernel env
requirements_file = self.binder_path("requirements.txt")
if os.path.exists(requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
'{} install --no-cache-dir -r "{}"'.format(pip, requirements_file),
)
)
return scripts

@property
def _should_preassemble_pip(self):
"""Peek in requirements.txt to determine if we can assemble from only env files

If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
if not os.path.exists("binder") and os.path.exists("setup.py"):
# can't install from subset if we're using setup.py
return False
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if not os.path.exists(requirements_txt):
continue
with open(requirements_txt) as f:
for line in f:
if is_local_pip_requirement(line):
return False

# didn't find any local references,
# allow assembly from subset
return True

def get_preassemble_script_files(self):
assemble_files = super().get_preassemble_script_files()
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if os.path.exists(requirements_txt):
assemble_files[requirements_txt] = requirements_txt
return assemble_files

def get_preassemble_scripts(self):
"""Return scripts to run before adding the full repository"""
scripts = super().get_preassemble_scripts()
if self._should_preassemble_pip:
scripts.extend(self._get_pip_scripts())
return scripts

def get_assemble_scripts(self):
"""Return series of build steps that require the full repository"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
if not self._should_preassemble_pip:
assemble_scripts.extend(self._get_pip_scripts())

# setup.py exists *and* binder dir is not used
if not self.binder_dir and os.path.exists(setup_py):
Expand Down
26 changes: 26 additions & 0 deletions repo2docker/utils.py
Expand Up @@ -431,3 +431,29 @@ def normalize_doi(val):
(e.g. https://doi.org/10.1234/jshd123)"""
m = doi_regexp.match(val)
return m.group(2)


def is_local_pip_requirement(line):
"""Return whether a pip requirement (e.g. in requirements.txt file) references a local file"""
# trim comments and skip empty lines
line = line.split("#", 1)[0].strip()
if not line:
return False
if line.startswith(("-r", "-c")):
# local -r or -c references break isolation
return True
# strip off `-e, etc.`
if line.startswith("-"):
line = line.split(None, 1)[1]
if "file://" in line:
# file references break isolation
return True
if "://" in line:
# handle git://../local/file
path = line.split("://", 1)[1]
else:
path = line
if path.startswith("."):
# references a local file
return True
return False
17 changes: 17 additions & 0 deletions tests/unit/test_utils.py
Expand Up @@ -110,3 +110,20 @@ def test_normalize_doi():
assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"


@pytest.mark.parametrize(
"req, is_local",
[
("-r requirements.txt", True),
("-e .", True),
("file://subdir", True),
("file://./subdir", True),
("git://github.com/jupyter/repo2docker", False),
("git+https://github.com/jupyter/repo2docker", False),
("numpy", False),
("# -e .", False),
],
)
def test_local_pip_requirement(req, is_local):
assert utils.is_local_pip_requirement(req) == is_local