Skip to content

Commit

Permalink
Merge pull request #64 from jupyterhub/kubeproxy
Browse files Browse the repository at this point in the history
Add a Kubernetes Ingress based Proxy implementation
  • Loading branch information
yuvipanda committed Dec 21, 2017
2 parents 1725c88 + 8bba55e commit 5c9bf38
Show file tree
Hide file tree
Showing 7 changed files with 478 additions and 208 deletions.
6 changes: 0 additions & 6 deletions docs/source/spawner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@ Module: :mod:`kubespawner.spawner`

.. currentmodule:: kubespawner.spawner

:class:`SingletonExecutor`
--------------------------

.. autoconfigurable:: SingletonExecutor
:members:

:class:`KubeSpawner`
--------------------

Expand Down
113 changes: 96 additions & 17 deletions kubespawner/objects.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
"""
Helper methods for generating k8s API objects.
"""
import json
from urllib.parse import urlparse
import escapism
import string

from kubernetes.client.models.v1_pod import V1Pod
from kubernetes.client.models.v1_pod_spec import V1PodSpec
from kubernetes.client.models.v1_object_meta import V1ObjectMeta
from kubernetes.client.models.v1_pod_security_context import V1PodSecurityContext
from kubernetes.client.models.v1_local_object_reference import V1LocalObjectReference
from kubernetes.client.models.v1_volume import V1Volume
from kubernetes.client.models.v1_volume_mount import V1VolumeMount

from kubernetes.client.models.v1_container import V1Container
from kubernetes.client.models.v1_security_context import V1SecurityContext
from kubernetes.client.models.v1_container_port import V1ContainerPort
from kubernetes.client.models.v1_env_var import V1EnvVar
from kubernetes.client.models.v1_resource_requirements import V1ResourceRequirements

from kubernetes.client.models.v1_persistent_volume_claim import V1PersistentVolumeClaim
from kubernetes.client.models.v1_persistent_volume_claim_spec import V1PersistentVolumeClaimSpec

from kubernetes.client.models import (
V1Pod, V1PodSpec, V1PodSecurityContext,
V1ObjectMeta,
V1LocalObjectReference,
V1Volume, V1VolumeMount,
V1Container, V1ContainerPort, V1SecurityContext, V1EnvVar, V1ResourceRequirements,
V1PersistentVolumeClaim, V1PersistentVolumeClaimSpec,
V1Endpoints, V1EndpointSubset, V1EndpointAddress, V1EndpointPort,
V1Service, V1ServiceSpec, V1ServicePort,
V1beta1Ingress, V1beta1IngressSpec, V1beta1IngressRule,
V1beta1HTTPIngressRuleValue, V1beta1HTTPIngressPath,
V1beta1IngressBackend
)

def make_pod(
name,
Expand Down Expand Up @@ -283,3 +283,82 @@ def make_pvc(
pvc.spec.resources.requests = {"storage": storage}

return pvc

def make_ingress(
name,
routespec,
target,
data
):
"""
Returns an ingress, service, endpoint object that'll work for this service
"""
meta = V1ObjectMeta(
name=name,
annotations={
'hub.jupyter.org/proxy-data': json.dumps(data),
'hub.jupyter.org/proxy-routespec': routespec,
'hub.jupyter.org/proxy-target': target
},
labels={
'heritage': 'jupyterhub',
'component': 'singleuser-server',
'hub.jupyter.org/proxy-route': 'true'
}
)

if routespec.startswith('/'):
host = None
path = routespec
else:
host, path = routespec.split('/', 1)

target_parts = urlparse(target)

target_ip = target_parts.hostname
target_port = target_parts.port

# Make endpoint object
endpoint = V1Endpoints(
kind='Endpoints',
metadata=meta,
subsets=[
V1EndpointSubset(
addresses=[V1EndpointAddress(ip=target_ip)],
ports=[V1EndpointPort(port=target_port)]
)
]
)

# Make service object
service = V1Service(
kind='Service',
metadata=meta,
spec=V1ServiceSpec(
ports=[V1ServicePort(port=target_port, target_port=target_port)]
)
)

# Make Ingress object
ingress = V1beta1Ingress(
kind='Ingress',
metadata=meta,
spec=V1beta1IngressSpec(
rules=[V1beta1IngressRule(
host=host,
http=V1beta1HTTPIngressRuleValue(
paths=[
V1beta1HTTPIngressPath(
path=path,
backend=V1beta1IngressBackend(
service_name=name,
service_port=target_port
)
)
]
)
)]
)
)

return endpoint, service, ingress
250 changes: 250 additions & 0 deletions kubespawner/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
import os
import string
import escapism
import json
from kubernetes import client

from jupyterhub.proxy import Proxy
from jupyterhub.utils import exponential_backoff

from kubespawner.objects import make_ingress
from kubespawner.utils import generate_hashed_slug
from kubespawner.reflector import NamespacedResourceReflector
from concurrent.futures import ThreadPoolExecutor
from traitlets import Unicode
from tornado import gen
from tornado.concurrent import run_on_executor


class IngressReflector(NamespacedResourceReflector):
kind = 'ingresses'
labels = {
'heritage': 'jupyterhub',
'component': 'singleuser-server',
'hub.jupyter.org/proxy-route': 'true'
}

list_method_name = 'list_namespaced_ingress'
api_group_name = 'ExtensionsV1beta1Api'

@property
def ingresses(self):
return self.resources

class ServiceReflector(NamespacedResourceReflector):
kind = 'services'
labels = {
'heritage': 'jupyterhub',
'component': 'singleuser-server',
'hub.jupyter.org/proxy-route': 'true'
}

list_method_name = 'list_namespaced_service'

@property
def services(self):
return self.resources

class EndpointsReflector(NamespacedResourceReflector):
kind = 'endpoints'
labels = {
'heritage': 'jupyterhub',
'component': 'singleuser-server',
'hub.jupyter.org/proxy-route': 'true'
}

list_method_name = 'list_namespaced_endpoints'

@property
def endpoints(self):
return self.resources

class KubeIngressProxy(Proxy):
namespace = Unicode(
config=True,
help="""
Kubernetes namespace to spawn ingresses for single-user servers in.
If running inside a kubernetes cluster with service accounts enabled,
defaults to the current namespace. If not, defaults to 'default'
"""
)

def _namespace_default(self):
"""
Set namespace default to current namespace if running in a k8s cluster
If not in a k8s cluster with service accounts enabled, default to
'default'
"""
ns_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace'
if os.path.exists(ns_path):
with open(ns_path) as f:
return f.read().strip()
return 'default'

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

# We use the maximum number of concurrent user server starts (and thus proxy adds)
# as our threadpool maximum. This ensures that contention here does not become
# an accidental bottleneck. Since we serialize our create operations, we only
# need 1x concurrent_spawn_limit, not 3x.
self.executor = ThreadPoolExecutor(max_workers=self.app.concurrent_spawn_limit)

self.ingress_reflector = IngressReflector(parent=self, namespace=self.namespace)
self.service_reflector = ServiceReflector(parent=self, namespace=self.namespace)
self.endpoint_reflector = EndpointsReflector(parent=self, namespace=self.namespace)

self.core_api = client.CoreV1Api()
self.extension_api = client.ExtensionsV1beta1Api()

@run_on_executor
def asynchronize(self, method, *args, **kwargs):
return method(*args, **kwargs)

def safe_name_for_routespec(self, routespec):
safe_chars = set(string.ascii_lowercase + string.digits)
safe_name = generate_hashed_slug(
'jupyter-' + escapism.escape(routespec, safe=safe_chars, escape_char='-') + '-route'
)
return safe_name

@gen.coroutine
def add_route(self, routespec, target, data):
# Create a route with the name being escaped routespec
# Use full routespec in label
# 'data' is JSON encoded and put in an annotation - we don't need to query for it
safe_name = self.safe_name_for_routespec(routespec).lower()
endpoint, service, ingress = make_ingress(
safe_name,
routespec,
target,
data
)

@gen.coroutine
def ensure_object(create_func, patch_func, body, kind):
try:
resp = yield self.asynchronize(
create_func,
namespace=self.namespace,
body=body
)
self.log.info('Created %s/%s', kind, safe_name)
except client.rest.ApiException as e:
if e.status == 409:
# This object already exists, we should patch it to make it be what we want
self.log.warn("Trying to patch %s/%s, it already exists", kind, safe_name)
resp = yield self.asynchronize(
patch_func,
namespace=self.namespace,
body=body,
name=body.metadata.name
)
else:
raise

yield ensure_object(
self.core_api.create_namespaced_endpoints,
self.core_api.patch_namespaced_endpoints,
body=endpoint,
kind='endpoints'
)

yield exponential_backoff(
lambda: safe_name in self.endpoint_reflector.endpoints,
'Could not find endpoints/%s after creating it' % safe_name
)

yield ensure_object(
self.core_api.create_namespaced_service,
self.core_api.patch_namespaced_service,
body=service,
kind='service'
)

yield exponential_backoff(
lambda: safe_name in self.service_reflector.services,
'Could not find service/%s after creating it' % safe_name
)

yield ensure_object(
self.extension_api.create_namespaced_ingress,
self.extension_api.patch_namespaced_ingress,
body=ingress,
kind='ingress'
)

yield exponential_backoff(
lambda: safe_name in self.ingress_reflector.ingresses,
'Could not find ingress/%s after creating it' % safe_name
)

@gen.coroutine
def delete_route(self, routespec):
# We just ensure that these objects are deleted.
# This means if some of them are already deleted, we just let it
# be.
safe_name = self.safe_name_for_routespec(routespec).lower()

delete_options = client.V1DeleteOptions(grace_period_seconds=0)

delete_endpoint = self.asynchronize(
self.core_api.delete_namespaced_endpoints,
name=safe_name,
namespace=self.namespace,
)

delete_service = self.asynchronize(
self.core_api.delete_namespaced_service,
name=safe_name,
namespace=self.namespace,
)

delete_ingress = self.asynchronize(
self.extension_api.delete_namespaced_ingress,
name=safe_name,
namespace=self.namespace,
body=delete_options,
grace_period_seconds=0
)

# This seems like cleanest way to parallelize all three of these while
# also making sure we only ignore the exception when it's a 404.
def delete_if_exists(kind, future):
try:
yield future
except client.rest.ApiException as e:
if e.status != 404:
raise
self.log.warn("Could not delete %s %s: does not exist", kind, safe_name)


# The order matters for endpoint & service - deleting the service deletes
# the endpoint in the background. This can be racy however, so we do so
# explicitly ourselves as well. In the future, we can probably try a
# foreground cascading deletion (https://kubernetes.io/docs/concepts/workloads/controllers/garbage-collection/#foreground-cascading-deletion)
# instead, but for now this works well enough.
delete_if_exists('endpoint', delete_endpoint)
delete_if_exists('service', delete_service)
delete_if_exists('ingress', delete_ingress)


@gen.coroutine
def get_all_routes(self):
# copy everything, because iterating over this directly is not threadsafe
# FIXME: is this performance intensive? It could be! Measure?
# FIXME: Validate that this shallow copy *is* thread safe
ingress_copy = dict(self.ingress_reflector.ingresses)
routes = {
ingress.metadata.annotations['hub.jupyter.org/proxy-routespec']:
{
'routespec': ingress.metadata.annotations['hub.jupyter.org/proxy-routespec'],
'target': ingress.metadata.annotations['hub.jupyter.org/proxy-target'],
'data': json.loads(ingress.metadata.annotations['hub.jupyter.org/proxy-data'])
}
for ingress in ingress_copy.values()
}

return routes

0 comments on commit 5c9bf38

Please sign in to comment.