Skip to content

Commit

Permalink
Pass example builder instance to /version /health handlers, /health h…
Browse files Browse the repository at this point in the history
…andler is customisable
  • Loading branch information
manics committed Dec 24, 2022
1 parent 9066873 commit 79a21a7
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 57 deletions.
24 changes: 18 additions & 6 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from .builder import BuildHandler
from .config import ConfigHandler
from .events import EventLog
from .health import HealthHandler
from .health import HealthHandler, KubernetesHealthHandler
from .launcher import Launcher
from .log import log_request
from .main import LegacyRedirectHandler, MainHandler, ParameterizedMainHandler
Expand Down Expand Up @@ -304,6 +304,18 @@ def _valid_badge_base_url(self, proposal):
config=True,
)

health_handler_class = Type(
HealthHandler,
help="The Tornado /health handler class",
config=True,
)

@default("health_handler_class")
def _default_health_handler_class(self):
if issubclass(self.build_class, KubernetesBuildExecutor):
return KubernetesHealthHandler
return HealthHandler

per_repo_quota = Integer(
0,
help="""
Expand Down Expand Up @@ -869,9 +881,9 @@ def initialize(self, *args, **kwargs):

launch_quota = self.launch_quota_class(parent=self, executor=self.executor)

# Construct a Builder so that we can extract the version string to pass to the
# /version handler
temporary_builder = self.build_class(parent=self)
# Construct a Builder so that we can extract parameters such as the
# configuration or the version string to pass to /version and /health handlers
example_builder = self.build_class(parent=self)
self.tornado_settings.update(
{
"log_function": log_request,
Expand All @@ -884,7 +896,7 @@ def initialize(self, *args, **kwargs):
"build_token_check_origin": self.build_token_check_origin,
"build_token_secret": self.build_token_secret,
"build_token_expires_seconds": self.build_token_expires_seconds,
"builder_identifier": temporary_builder.identifier,
"example_builder": example_builder,
"pod_quota": self.pod_quota,
"per_repo_quota": self.per_repo_quota,
"per_repo_quota_higher": self.per_repo_quota_higher,
Expand Down Expand Up @@ -969,7 +981,7 @@ def initialize(self, *args, **kwargs):
{"path": os.path.join(self.tornado_settings["static_path"], "images")},
),
(r"/about", AboutHandler),
(r"/health", HealthHandler, {"hub_url": self.hub_url_local}),
(r"/health", self.health_handler_class, {"hub_url": self.hub_url_local}),
(r"/_config", ConfigHandler),
(r"/", MainHandler),
(r".*", Custom404),
Expand Down
2 changes: 1 addition & 1 deletion binderhub/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ async def get(self):
self.write(
json.dumps(
{
"builder": self.settings["builder_identifier"],
"builder": self.settings["example_builder"].identifier,
"binderhub": binder_version,
}
)
Expand Down
116 changes: 66 additions & 50 deletions binderhub/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,34 +104,7 @@ class HealthHandler(BaseHandler):

def initialize(self, hub_url=None):
self.hub_url = hub_url

@at_most_every
async def _get_pods(self):
"""Get information about build and user pods"""
namespace = self.settings["build_namespace"]
k8s = self.settings["kubernetes_client"]
pool = self.settings["executor"]

app_log.info(f"Getting pod statistics for {namespace}")

label_selectors = [
"app=jupyterhub,component=singleuser-server",
"component=binderhub-build",
]
requests = [
asyncio.wrap_future(
pool.submit(
k8s.list_namespaced_pod,
namespace,
label_selector=label_selector,
_preload_content=False,
_request_timeout=KUBE_REQUEST_TIMEOUT,
)
)
for label_selector in label_selectors
]
responses = await asyncio.gather(*requests)
return [json.loads(resp.read())["items"] for resp in responses]
self.ignored_checks = set()

@false_if_raises
@retry
Expand All @@ -155,23 +128,9 @@ async def check_docker_registry(self):
)
return True

async def check_pod_quota(self):
"""Compare number of active pods to available quota"""
user_pods, build_pods = await self._get_pods()

n_user_pods = len(user_pods)
n_build_pods = len(build_pods)

quota = self.settings["pod_quota"]
total_pods = n_user_pods + n_build_pods
usage = {
"total_pods": total_pods,
"build_pods": n_build_pods,
"user_pods": n_user_pods,
"quota": quota,
"ok": total_pods <= quota if quota is not None else True,
}
return usage
async def check_quotas(self):
"""Check whether any quotas are exceeded"""
return {"ok": True}

async def check_all(self):
"""Runs all health checks and returns a tuple (overall, checks).
Expand All @@ -189,19 +148,20 @@ async def check_all(self):
check_futures.append(self.check_jupyterhub_api(self.hub_url))
checks.append({"service": "JupyterHub API", "ok": False})

check_futures.append(self.check_pod_quota())
checks.append({"service": "Pod quota", "ok": False})
check_futures.append(self.check_quotas())
checks.append({"service": "Quotas", "ok": False})

for result, check in zip(await asyncio.gather(*check_futures), checks):
if isinstance(result, bool):
check["ok"] = result
else:
check.update(result)

# The pod quota is treated as a soft quota this means being above
# quota doesn't mean the service is unhealthy
# Some checks are for information but do not count as a health failure
overall = all(
check["ok"] for check in checks if check["service"] != "Pod quota"
check["ok"]
for check in checks
if check["service"] not in self.ignored_checks
)
if not overall:
unhealthy = [check for check in checks if not check["ok"]]
Expand All @@ -218,3 +178,59 @@ async def head(self):
overall, checks = await self.check_all()
if not overall:
self.set_status(503)


class KubernetesHealthHandler(HealthHandler):
"""Serve health status on Kubernetes"""

def initialize(self, **args):
super().initialize(**args)
# The pod quota is treated as a soft quota
# Being above quota doesn't mean the service is unhealthy
self.ignored_checks.add("Quotas")

@at_most_every
async def _get_pods(self):
"""Get information about build and user pods"""
namespace = self.settings["example_builder"].namespace
k8s = self.settings["example_builder"].api
pool = self.settings["executor"]

app_log.info(f"Getting pod statistics for {namespace}")

label_selectors = [
"app=jupyterhub,component=singleuser-server",
"component=binderhub-build",
]
requests = [
asyncio.wrap_future(
pool.submit(
k8s.list_namespaced_pod,
namespace,
label_selector=label_selector,
_preload_content=False,
_request_timeout=KUBE_REQUEST_TIMEOUT,
)
)
for label_selector in label_selectors
]
responses = await asyncio.gather(*requests)
return [json.loads(resp.read())["items"] for resp in responses]

async def check_quotas(self):
"""Compare number of active pods to available quota"""
user_pods, build_pods = await self._get_pods()

n_user_pods = len(user_pods)
n_build_pods = len(build_pods)

quota = self.settings["pod_quota"]
total_pods = n_user_pods + n_build_pods
usage = {
"total_pods": total_pods,
"build_pods": n_build_pods,
"user_pods": n_user_pods,
"quota": quota,
"ok": total_pods <= quota if quota is not None else True,
}
return usage

0 comments on commit 79a21a7

Please sign in to comment.