Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Breaking: Default to traitlets based Build class #1521

Merged
merged 11 commits into from Jan 9, 2023
22 changes: 22 additions & 0 deletions CHANGES.md
Expand Up @@ -33,6 +33,28 @@ If you were previously disabling the image cleaner replace:

- `imageCleaner.host.enabled: false` ➡️ `imageCleaner.enabled: false`

### `binderhub.build.Build` class replaced by `binderhub.build.KubernetesBuildExecutor`

The `binderhub.build.Build` class is replaced by the Traitlets based `binderhub.build.KubernetesBuildExecutor` class
[#1518](https://github.com/jupyterhub/binderhub/pull/1518),
[#1521](https://github.com/jupyterhub/binderhub/pull/1521).

The following build configuration properties should be set using Traitlets in the BinderHub configuration:

- `c.BinderHub.appendix` ➡️ `c.BuildExecutor.appendix`
- `c.BinderHub.sticky_builds` ➡️ `c.KubernetesBuildExecutor.sticky_builds`
- `c.BinderHub.log_tail_lines` ➡️ `c.KubernetesBuildExecutor.log_tail_lines`
- `c.BinderHub.push_secret` ➡️ `c.BuildExecutor.push_secret`
- `c.BinderHub.build_memory_request` ➡️ `c.KubernetesBuildExecutor.memory_request`
- `c.BinderHub.build_memory_limit` ➡️ `c.BuildExecutor.memory_limit`
- `c.BinderHub.build_docker_host` ➡️ `c.KubernetesBuildExecutor.docker_host`
- `c.BinderHub.build_namespace` ➡️ `c.KubernetesBuildExecutor.namespace`
- `c.BinderHub.build_image` ➡️ `c.KubernetesBuildExecutor.build_image`
- `c.BinderHub.build_node_selector` ➡️ `c.KubernetesBuildExecutor.node_selector`

If you have subclassed `binderhub.build.Build` you must update your subclass (including `__init__()` if defined) to inherit from `binderhub.build.KubernetesBuildExecutor`.
The behaviour of the class is otherwise unchanged.

# 0.2.0

# master@{2019-07-01}...master@{2019-10-01}
Expand Down
97 changes: 69 additions & 28 deletions binderhub/app.py
Expand Up @@ -8,6 +8,7 @@
import os
import re
import secrets
import warnings
from binascii import a2b_hex
from concurrent.futures import ThreadPoolExecutor
from glob import glob
Expand Down Expand Up @@ -41,7 +42,7 @@
from traitlets.config import Application

from .base import AboutHandler, Custom404, VersionHandler
from .build import Build, BuildExecutor, KubernetesBuildExecutor
from .build import BuildExecutor, KubernetesBuildExecutor, KubernetesCleaner
from .builder import BuildHandler
from .config import ConfigHandler
from .events import EventLog
Expand Down Expand Up @@ -229,6 +230,8 @@ def _valid_badge_base_url(self, proposal):

appendix = Unicode(
help="""
DEPRECATED: Use c.BuildExecutor.appendix

Appendix to pass to repo2docker

A multi-line string of Docker directives to run.
Expand All @@ -248,6 +251,8 @@ def _valid_badge_base_url(self, proposal):
sticky_builds = Bool(
False,
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.sticky_builds

Attempt to assign builds for the same repository to the same node.

In order to speed up re-builds of a repository all its builds will
Expand All @@ -270,7 +275,7 @@ def _valid_badge_base_url(self, proposal):
)

build_class = Type(
Build,
KubernetesBuildExecutor,
klass=BuildExecutor,
help="""
The class used to build repo2docker images.
Expand All @@ -280,6 +285,15 @@ def _valid_badge_base_url(self, proposal):
config=True,
)

build_cleaner_class = Type(
KubernetesCleaner,
allow_none=True,
help="""
The class used to cleanup builders.
""",
config=True,
)

registry_class = Type(
DockerRegistry,
help="""
Expand Down Expand Up @@ -369,6 +383,8 @@ def _pod_quota_deprecated(self, change):
log_tail_lines = Integer(
100,
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.log_tail_lines

Limit number of log lines to show when connecting to an already running build.
""",
config=True,
Expand All @@ -378,6 +394,8 @@ def _pod_quota_deprecated(self, change):
"binder-build-docker-config",
allow_none=True,
help="""
DEPRECATED: Use c.BuildExecutor.push_secret

A kubernetes secret object that provides credentials for pushing built images.
""",
config=True,
Expand All @@ -401,6 +419,8 @@ def _pod_quota_deprecated(self, change):
build_memory_request = ByteSpecification(
0,
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.memory_request

Amount of memory to request when scheduling a build

0 reserves no memory.
Expand All @@ -416,6 +436,8 @@ def _pod_quota_deprecated(self, change):
build_memory_limit = ByteSpecification(
0,
help="""
DEPRECATED: Use c.BuildExecutor.memory_limit

Max amount of memory allocated for each image build process.

0 sets no limit.
Expand All @@ -440,6 +462,8 @@ def _pod_quota_deprecated(self, change):
"/var/run/docker.sock",
config=True,
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.docker_host

The docker URL repo2docker should use to build the images.

Currently, only paths are supported, and they are expected to be available on
Expand Down Expand Up @@ -518,6 +542,8 @@ def _add_slash(self, proposal):

build_namespace = Unicode(
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.namespace

Kubernetes namespace to spawn build pods in.

Note that the push_secret must refer to a secret in this namespace.
Expand All @@ -532,6 +558,8 @@ def _default_build_namespace(self):
build_image = Unicode(
"quay.io/jupyterhub/repo2docker:2022.10.0",
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.build_image

The repo2docker image to be used for doing builds
""",
config=True,
Expand All @@ -541,6 +569,8 @@ def _default_build_namespace(self):
{},
config=True,
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.node_selector

Select the node where build pod runs on.
""",
)
Expand Down Expand Up @@ -737,6 +767,27 @@ def _template_path_default(self):
help="Origin to use when emitting events. Defaults to hostname of request when empty",
)

_build_config_deprecated_map = {
"appendix": ("BuildExecutor", "appendix"),
"push_secret": ("BuildExecutor", "push_secret"),
"build_memory_limit": ("BuildExecutor", "memory_limit"),
"sticky_builds": ("KubernetesBuildExecutor", "sticky_builds"),
"log_tail_lines": ("KubernetesBuildExecutor", "log_tail_lines"),
"build_memory_request": ("KubernetesBuildExecutor", "memory_request"),
"build_docker_host": ("KubernetesBuildExecutor", "docker_host"),
"build_namespace": ("KubernetesBuildExecutor", "namespace"),
"build_image": ("KubernetesBuildExecutor", "build_image"),
"build_node_selector": ("KubernetesBuildExecutor", "node_selector"),
}

@observe(*_build_config_deprecated_map)
def _build_config_deprecated(self, change):
dest_cls, dest_name = self._build_config_deprecated_map[change.name]
self.log.warning(
"BinderHub.%s is deprecated, use %s.%s", change.name, dest_cls, dest_name
)
self.config[dest_cls][dest_name] = change.new

@staticmethod
def add_url_prefix(prefix, handlers):
"""add a url prefix to handlers"""
Expand Down Expand Up @@ -830,25 +881,22 @@ def initialize(self, *args, **kwargs):

launch_quota = self.launch_quota_class(parent=self, executor=self.executor)

# Construct a Builder so that we can extract parameters such as the
# configuration or the version string to pass to /version and /health handlers
example_builder = self.build_class(parent=self)
Comment on lines +884 to +886
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not confident about this situation, but wondering if it would make practical sense to work with class state on the build class itself instead of state object instance state for this information.

If so, we wouldn't have to create a dummy object, but instead just read the relevant information from the class.

What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this relates to identifier now being made a configurable traitlet. If we go for class state, the identifier field should probably not be a traitlet - but it doesn't seem like something that makes sense for users to configure themselves, but rather like something for the creator of the buildexecutor class to declare.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's based on a traitlets value, which AFAIK can't be static. I've renamed the variable where it's used to make it clearer, see e38480a

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it makes sense for this to be configurable, lets go with this as it is! Can you imagine a plausible situation when this could make sense to configure?

# is this relevant/sensible to support?
c.KubernetesBuildExecutor.builder_info = <something>

I'm currently understanding builder_info as something inherent to the builder class providing it, like a __version__ field, which I don't think should be configurable. So I guess there are two parts to my question about this:

  1. Should this be made configurable by using a trait with .config(True)
  2. Should this be class state or object state (where I also think the use of class state prohibits the use of a trait to store it)

Copy link
Member Author

@manics manics Jan 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For KubernetesBuildExecutor the default value includes build_image which is a user-configurable traitlet, so unless traitlets properties can be static builder_info can't be a static class property, e.g.:

{
  "builder_info": {
    "build_image": "quay.io/manics/repo2docker:2023-01-02-12-10-arm64"
  },
  "binderhub": "0.2.0+1243.gf5f8c33.dirty",
  "builder": "quay.io/manics/repo2docker:2023-01-02-12-10-arm64"
}

Compare with the current output: https://mybinder.org/versions

For config=True/False ..... debatable. /versions is a public endpoint, including on authenticated BinderHubs, so one use could be to deliberately hide the builder_info. Not sure if that's useful..... what do you prefer?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, well i think its not worth further thinking if we didnt come up with a clean alternative strategy by now.


PR LGTM ready for merge in my mind!

Thank you for amazing efforts in binderhub Simon!!!

self.tornado_settings.update(
{
"log_function": log_request,
"push_secret": self.push_secret,
"image_prefix": self.image_prefix,
"debug": self.debug,
"launcher": self.launcher,
"appendix": self.appendix,
"ban_networks": self.ban_networks,
"ban_networks_min_prefix_len": self.ban_networks_min_prefix_len,
"build_namespace": self.build_namespace,
"build_image": self.build_image,
"build_node_selector": self.build_node_selector,
"build_pool": self.build_pool,
"build_token_check_origin": self.build_token_check_origin,
"build_token_secret": self.build_token_secret,
"build_token_expires_seconds": self.build_token_expires_seconds,
"sticky_builds": self.sticky_builds,
"log_tail_lines": self.log_tail_lines,
"example_builder": example_builder,
"pod_quota": self.pod_quota,
"per_repo_quota": self.per_repo_quota,
"per_repo_quota_higher": self.per_repo_quota_higher,
Expand All @@ -866,9 +914,6 @@ def initialize(self, *args, **kwargs):
"banner_message": self.banner_message,
"extra_footer_scripts": self.extra_footer_scripts,
"jinja2_env": jinja_env,
"build_memory_limit": self.build_memory_limit,
"build_memory_request": self.build_memory_request,
"build_docker_host": self.build_docker_host,
"build_docker_config": self.build_docker_config,
"base_url": self.base_url,
"badge_base_url": self.badge_base_url,
Expand Down Expand Up @@ -969,25 +1014,21 @@ def stop(self):
self.build_pool.shutdown()

async def watch_build_pods(self):
"""Watch build pods
warnings.warn(
"watch_build_pods() is deprecated, use watch_builders()", DeprecationWarning
)
await self.watch_builders()

Every build_cleanup_interval:
- delete stopped build pods
- delete running build pods older than build_max_age
async def watch_builders(self):
"""
Watch builders, run a cleanup function every build_cleanup_interval
"""
while True:
while self.build_cleaner_class:
cleaner = self.build_cleaner_class()
try:
await asyncio.wrap_future(
self.executor.submit(
lambda: Build.cleanup_builds(
self.kube_client,
self.build_namespace,
self.build_max_age,
)
)
)
await asyncio.wrap_future(self.executor.submit(cleaner.cleanup))
except Exception:
app_log.exception("Failed to cleanup build pods")
app_log.exception("Failed to cleanup builders")
await asyncio.sleep(self.build_cleanup_interval)

def start(self, run_loop=True):
Expand All @@ -998,7 +1039,7 @@ def start(self, run_loop=True):
)
self.http_server.listen(self.port)
if self.builder_required:
asyncio.ensure_future(self.watch_build_pods())
asyncio.ensure_future(self.watch_builders())
if run_loop:
tornado.ioloop.IOLoop.current().start()

Expand Down
16 changes: 8 additions & 8 deletions binderhub/base.py
Expand Up @@ -244,11 +244,11 @@ class VersionHandler(BaseHandler):

async def get(self):
self.set_header("Content-type", "application/json")
self.write(
json.dumps(
{
"builder": self.settings["build_image"],
"binderhub": binder_version,
}
)
)
r = {
"builder_info": self.settings["example_builder"].builder_info,
"binderhub": binder_version,
}
# Backwards compatibility
if "build_image" in r["builder_info"]:
r["builder"] = r["builder_info"]["build_image"]
self.write(json.dumps(r))