jupyterhub · yuvipanda · Oct 2, 2017 · Oct 2, 2017
diff --git a/kubespawner/reflector.py b/kubespawner/reflector.py
@@ -2,9 +2,9 @@
 import threading
 
 from traitlets.config import SingletonConfigurable
-from traitlets import Dict, Unicode
+from traitlets import Any, Dict, Unicode
 from kubernetes import client, config, watch
-
+from tornado.ioloop import IOLoop
 
 class PodReflector(SingletonConfigurable):
     """
@@ -38,6 +38,8 @@ class PodReflector(SingletonConfigurable):
         """
     )
 
+    on_failure = Any(help="""Function to be called when the reflector gives up.""")
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # Load kubernetes config here, since this is a Singleton and
@@ -108,12 +110,15 @@ def _watch_and_update(self):
                     else:
                         # This is an atomic operation on the dictionary!
                         self.pods[pod.metadata.name] = pod
-            except:
+            except Exception:
                 cur_delay = cur_delay * 2
+                if cur_delay > 30:
+                    self.log.exception("Watching pods never recovered, giving up")
+                    if self.on_failure:
+                        self.on_failure()
+                    return
                 self.log.exception("Error when watching pods, retrying in %ss", cur_delay)
                 time.sleep(cur_delay)
-                if cur_delay > 30:
-                    raise
                 continue
             finally:
                 w.stop()

diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py
@@ -16,6 +16,7 @@
 
 
 from tornado import gen
+from tornado.ioloop import IOLoop
 from tornado.concurrent import run_on_executor
 from traitlets.config import SingletonConfigurable
 from traitlets import Type, Unicode, List, Integer, Union, Dict, Bool, Any
@@ -50,9 +51,15 @@ def __init__(self, *args, **kwargs):
         # other attributes
         self.executor = SingletonExecutor.instance(max_workers=self.k8s_api_threadpool_workers)
 
+        main_loop = IOLoop.current()
+        def on_reflector_failure():
+            self.log.critical("Pod reflector failed, halting Hub.")
+            main_loop.stop()
+
         # This will start watching in __init__, so it'll start the first
         # time any spawner object is created. Not ideal but works!
-        self.pod_reflector = PodReflector.instance(parent=self, namespace=self.namespace)
+        self.pod_reflector = PodReflector.instance(parent=self, namespace=self.namespace,
+            on_failure=on_reflector_failure)
 
         self.api = client.CoreV1Api()