From 0f4537e473f019101f6d1733e9ea140ec8a6a7c0 Mon Sep 17 00:00:00 2001 From: MinRK Date: Tue, 19 Aug 2014 17:20:07 -0700 Subject: [PATCH] cleanup pass - logging - process cleanup - docstrings - COPYING.md --- COPYING.md | 59 ++++++++++++++++++++++++++++++ README.md | 12 +++---- multiuser/app.py | 80 +++++++++++++++++++++++++++++++---------- multiuser/db.py | 80 +++++++++++++++++++++++------------------ multiuser/handlers.py | 78 +++++++++++++--------------------------- multiuser/singleuser.py | 16 +++++---- multiuser/spawner.py | 32 +++++++++-------- multiuser/utils.py | 2 +- 8 files changed, 225 insertions(+), 134 deletions(-) create mode 100644 COPYING.md diff --git a/COPYING.md b/COPYING.md new file mode 100644 index 0000000000..d62c372469 --- /dev/null +++ b/COPYING.md @@ -0,0 +1,59 @@ +# The Jupyter multi-user notebook server licensing terms + +Jupyter multi-user notebook server is licensed under the terms of the Modified BSD License +(also known as New or Revised or 3-Clause BSD), as follows: + +- Copyright (c) 2014-, Jupyter Development Team + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the Jupyter Development Team nor the names of its +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## About the Jupyter Development Team + +The Jupyter Development Team is the set of all contributors to the Jupyter project. +This includes all of the Jupyter subprojects. + +The core team that coordinates development on GitHub can be found here: +https://github.com/jupyter/. + +## Our Copyright Policy + +Jupyter uses a shared copyright model. Each contributor maintains copyright +over their contributions to Jupyter. But, it is important to note that these +contributions are typically only changes to the repositories. Thus, the Jupyter +source code, in its entirety is not the copyright of any single person or +institution. Instead, it is the collective copyright of the entire Jupyter +Development Team. If individual contributors want to maintain a record of what +changes/contributions they have specific copyright on, they should indicate +their copyright in the commit message of the change, when they commit the +change to one of the Jupyter repositories. + +With this in mind, the following banner should be used in any source code file +to indicate the copyright and license terms: + + # Copyright (c) Jupyter Development Team. + # Distributed under the terms of the Modified BSD License. diff --git a/README.md b/README.md index 2417ca8d82..3a1570f742 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,16 @@ This repo hosts the development of a multi-user server to manage and proxy multi Three actors: -- multi-user server (tornado process) +- multi-user Hub (tornado process) - configurable http proxy (node-http-proxy) -- multiple single-user IPython notbeook servers (Python/IPython/tornado) +- multiple single-user IPython notebook servers (Python/IPython/tornado) Basic principals: -- MUS spawns proxy -- proxy forwards ~all requests to MUS by default -- MUS handles login, and spawns single-user servers on demand -- MUS configures proxy to forward url prefixes to single-user servers +- Hub spawns proxy +- Proxy forwards ~all requests to hub by default +- Hub handles login, and spawns single-user servers on demand +- Hub configures proxy to forward url prefixes to single-user servers ## dependencies diff --git a/multiuser/app.py b/multiuser/app.py index 387d746cbf..c704d60314 100644 --- a/multiuser/app.py +++ b/multiuser/app.py @@ -1,11 +1,14 @@ #!/usr/bin/env python +"""The multi-user notebook application""" +import logging import os from subprocess import Popen import tornado.httpserver import tornado.ioloop import tornado.options +from tornado.log import LogFormatter from tornado import web from IPython.utils.traitlets import ( @@ -13,7 +16,6 @@ DottedObjectName, ) from IPython.config import Application -from IPython.html.utils import url_path_join from IPython.utils.importstring import import_item here = os.path.dirname(__file__) @@ -27,11 +29,10 @@ ) from . import db - -# from .user import UserManager +from .utils import url_path_join class MultiUserApp(Application): - + """An Application for starting the Multi-User Notebook server.""" ip = Unicode('localhost', config=True, help="The public facing ip of the proxy" ) @@ -86,15 +87,8 @@ def _hub_prefix_changed(self, name, old, new): def _cookie_secret_default(self): return b'secret!' - # spawning subprocesses - spawner_class = DottedObjectName("multiuser.spawner.ProcessSpawner") - def _spawner_class_changed(self, name, old, new): - self.spawner = import_item(new) - - spawner = Any() - def _spawner_default(self): - return import_item(self.spawner_class) - + # class for spawning single-user servers + spawner_class = DottedObjectName("multiuser.spawner.LocalProcessSpawner") db_url = Unicode('sqlite:///:memory:', config=True) debug_db = Bool(False) @@ -104,7 +98,35 @@ def _spawner_default(self): handlers = List() - def add_url_prefix(self, prefix, handlers): + + _log_formatter_cls = LogFormatter + + def _log_level_default(self): + return logging.INFO + + def _log_datefmt_default(self): + """Exclude date from default date format""" + return "%H:%M:%S" + + def _log_format_default(self): + """override default log format to include time""" + return u"%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s]%(end_color)s %(message)s" + + def init_logging(self): + # This prevents double log messages because tornado use a root logger that + # self.log is a child of. The logging module dipatches log messages to a log + # and all of its ancenstors until propagate is set to False. + self.log.propagate = False + + # hook up tornado 3's loggers to our app handlers + logger = logging.getLogger('tornado') + logger.propagate = True + logger.parent = self.log + logger.setLevel(self.log.level) + + + @staticmethod + def add_url_prefix(prefix, handlers): """add a url prefix to handlers""" for i, tup in enumerate(handlers): lis = list(tup) @@ -127,9 +149,11 @@ def init_handlers(self): def init_db(self): # TODO: load state from db for resume + # TODO: if not resuming, clear existing db contents self.db = db.new_session(self.db_url, echo=self.debug_db) def init_hub(self): + """Load the Hub config into the database""" self.hub = db.Hub( server=db.Server( ip=self.hub_ip, @@ -143,6 +167,7 @@ def init_hub(self): self.db.commit() def init_proxy(self): + """Load the Proxy config into the database""" self.proxy = db.Proxy( public_server=db.Server( ip=self.ip, @@ -159,6 +184,7 @@ def init_proxy(self): self.db.commit() def start_proxy(self): + """Actually start the configurable-http-proxy""" env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.proxy.auth_token self.proxy = Popen(["node", os.path.join(here, 'js', 'main.js'), @@ -168,17 +194,24 @@ def start_proxy(self): ], env=env) def init_tornado_settings(self): + """Set up the tornado settings dict.""" base_url = self.base_url - self.tornado_settings.update( + settings = dict( + config=self.config, db=self.db, hub=self.hub, + spawner_class=import_item(self.spawner_class), base_url=base_url, cookie_secret=self.cookie_secret, login_url=url_path_join(self.hub.server.base_url, 'login'), template_path=os.path.join(here, 'templates'), ) + # allow configured settings to have priority + settings.update(self.tornado_settings) + self.tornado_settings = settings def init_tornado_application(self): + """Instantiate the tornado Application object""" self.tornado_application = web.Application(self.handlers, **self.tornado_settings) def initialize(self, *args, **kwargs): @@ -190,8 +223,21 @@ def initialize(self, *args, **kwargs): self.init_tornado_settings() self.init_tornado_application() + def cleanup(self): + self.log.info("Cleaning up proxy...") + self.proxy.terminate() + self.log.info("Cleaning up single-user servers...") + Spawner = import_item(self.spawner_class) + for user in self.db.query(db.User): + if user.spawner is not None: + user.spawner.stop() + self.log.info("...done") + def start(self): + """Start the whole thing""" + # start the proxy self.start_proxy() + # start the webserver http_server = tornado.httpserver.HTTPServer(self.tornado_application) http_server.listen(self.hub_port) try: @@ -199,9 +245,7 @@ def start(self): except KeyboardInterrupt: print("\nInterrupted") finally: - pass - # self.proxy.terminate() - # self.user_manager.cleanup() + self.cleanup() main = MultiUserApp.launch_instance diff --git a/multiuser/db.py b/multiuser/db.py index a539aba158..861e3a2f1b 100644 --- a/multiuser/db.py +++ b/multiuser/db.py @@ -1,4 +1,4 @@ -"""sqlalchemy ORM tools for the user database""" +"""sqlalchemy ORM tools for the state of the constellation of processes""" # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. @@ -11,14 +11,12 @@ Column, Integer, String, ForeignKey, Unicode, Binary, ) from sqlalchemy.ext.declarative import declarative_base, declared_attr - from sqlalchemy.orm import sessionmaker, relationship, backref - from sqlalchemy import create_engine from IPython.utils.py3compat import str_to_unicode -from .utils import random_port +from .utils import random_port, url_path_join def new_token(*args, **kwargs): @@ -56,30 +54,44 @@ def process_result_value(self, value, dialect): class Server(Base): + """The basic state of a server + + connection and cookie info + """ __tablename__ = 'servers' id = Column(Integer, primary_key=True) proto = Column(Unicode, default=u'http') ip = Column(Unicode, default=u'localhost') port = Column(Integer, default=random_port) - cookie_secret = Column(Binary) - cookie_name = Column(Unicode) base_url = Column(Unicode, default=u'/') + cookie_secret = Column(Binary, default=b'secret') + cookie_name = Column(Unicode, default=u'cookie') def __repr__(self): return "" % (self.ip, self.port) @property - def url(self): - return "{proto}://{ip}:{port}{url}".format( + def host(self): + return "{proto}://{ip}:{port}".format( proto=self.proto, ip=self.ip, port=self.port, - url=self.base_url, + ) + + @property + def url(self): + return "{host}{uri}".format( + host=self.host, + uri=self.base_url, ) class Proxy(Base): - """A configurable-http-proxy instance""" + """A configurable-http-proxy instance. + + A proxy consists of the API server info and the public-facing server info, + plus an auth token for configuring the proxy table. + """ __tablename__ = 'proxies' id = Column(Integer, primary_key=True) auth_token = Column(Unicode, default=new_token) @@ -98,22 +110,22 @@ def __repr__(self): class Hub(Base): - """Bring it all together at the hub""" + """Bring it all together at the hub. + + The Hub is a server, plus its API path suffix + + the api_url is the full URL plus the api_path suffix on the end + of the server base_url. + """ __tablename__ = 'hubs' id = Column(Integer, primary_key=True) _server_id = Column(Integer, ForeignKey('servers.id')) server = relationship(Server, primaryjoin=_server_id == Server.id) - api_url = Column(Unicode, default=u'/hub/api/') @property - def api_host_url(self): + def api_url(self): """return the full API url (with proto://host...)""" - return "{proto}://{ip}:{port}{url}".format( - proto=self.server.proto, - ip=self.server.ip, - port=self.server.port, - url=self.api_url, - ) + return url_path_join(self.server.url, 'api') def __repr__(self): if self.server: @@ -125,16 +137,30 @@ def __repr__(self): class User(Base): - """The User table""" + """The User table + + Each user has a single server, + and multiple tokens used for authorization. + + API tokens grant access to the Hub's REST API. + These are used by single-user servers to authenticate requests. + + Cookie tokens are used to authenticate browser sessions. + + A `state` column contains a JSON dict, + used for restoring state of a Spawner. + """ __tablename__ = 'users' id = Column(Integer, primary_key=True) name = Column(Unicode) + # should we allow multiple servers per user? _server_id = Column(Integer, ForeignKey('servers.id')) server = relationship(Server, primaryjoin=_server_id == Server.id) api_tokens = relationship("APIToken", backref="user") cookie_tokens = relationship("CookieToken", backref="user") state = Column(JSONDict) + spawner = None def __repr__(self): if self.server: @@ -197,17 +223,3 @@ def new_session(url="sqlite:///:memory:", **kwargs): return session -if __name__ == '__main__': - engine = create_engine('sqlite:///:memory:', echo=True) - Session = sessionmaker(bind=engine) - session = Session() - Base.metadata.create_all(engine) - - hub = Hub() - session.add(hub) - session.commit() - - minrk = User(name="minrk") - session.add(minrk) - session.commit() - \ No newline at end of file diff --git a/multiuser/handlers.py b/multiuser/handlers.py index 94ab98065d..e13d9a2163 100644 --- a/multiuser/handlers.py +++ b/multiuser/handlers.py @@ -14,11 +14,9 @@ from tornado.web import RequestHandler from tornado import web -from IPython.html.utils import url_path_join - from . import db -from .spawner import PopenSpawner -from .utils import random_port, wait_for_server +from .spawner import LocalProcessSpawner +from .utils import random_port, wait_for_server, url_path_join class BaseHandler(RequestHandler): @@ -33,6 +31,10 @@ def log(self): def config(self): return self.settings.get('config', None) + @property + def base_url(self): + return self.settings.get('base_url', '/') + @property def db(self): return self.settings['db'] @@ -41,53 +43,30 @@ def db(self): def hub(self): return self.settings['hub'] - @property - def cookie_name(self): - return self.settings.get('cookie_name', 'cookie') - - @property - def hub_url(self): - return self.settings.get('hub_url', '') - - @property - def hub_prefix(self): - return self.settings.get('hub_prefix', '/hub/') - def get_current_user(self): if 'get_current_user' in self.settings: - return self.settings['get_current_user']() + return self.settings['get_current_user'](self) - token = self.get_cookie(self.cookie_name, '') + token = self.get_cookie(self.hub.server.cookie_name, None) if token: - session = self.user_manager.user_for_cookie_token(token) - if session: - return session.user - - @property - def base_url(self): - return self.settings.setdefault('base_url', '/') + cookie_token = self.db.query(db.CookieToken).filter(db.CookieToken.token==token).first() + if cookie_token: + return cookie_token.user.name + else: + # have cookie, but it's not valid. Clear it and start over. + self.clear_cookie(self.hub.server.cookie_name) def clear_login_cookie(self): - self.clear_cookie(self.cookie_name) + username = self.get_current_user() + if username is not None: + user = self.db.query(User).filter(name=username).first() + if user is not None: + self.clear_cookie(user.server.cookie_name, path=user.server.base_url) + self.clear_cookie(self.cookie_name, path=self.hub.base_url) @property def spawner_class(self): - return self.settings.get('spawner_class', PopenSpawner) - - # def spawn_single_user(self, user): - # spawner = self.spawner_class( - # user=user, - # cookie_secret=self.settings['cookie_secret'], - # hub_api_url=self.settings['hub_api_url'], - # hub_prefix=self.settings['hub_prefix'], - # ) - # session = self.user_manager.get_session(user, - # cookie_secret=self.settings['cookie_secret'], - # hub_api_url=self.settings['hub_api_url'], - # hub_prefix=self.settings['hub_prefix'], - # ) - # self.user_manager.spawn(user) - # return session + return self.settings.get('spawner_class', LocalProcessSpawner) class RootHandler(BaseHandler): @@ -104,14 +83,14 @@ class UserHandler(BaseHandler): """ @web.authenticated def get(self, user): - self.log.debug("hub at single-user url: %s", user) + self.log.warn("Hub caught serving single-user url: %s", user) if self.get_current_user() == user: self.spawn_single_user(user) self.redirect('') else: self.clear_login_cookie() self.redirect(url_concat(self.settings['login_url'], { - 'next' : '/user/%s/' % user + 'next' : self.request.path, })) @@ -169,7 +148,7 @@ def spawn_single_user(self, name): self.db.add(api_token) self.db.commit() - spawner = self.spawner_class( + spawner = user.spawner = self.spawner_class( config=self.config, user=user, hub=self.hub, @@ -189,8 +168,6 @@ def post(self): pwd = self.get_argument('password', default=u'') next_url = self.get_argument('next', default='') or '/user/%s/' % name if name and pwd == 'password': - import IPython - # IPython.embed() user = self.db.query(db.User).filter(db.User.name == name).first() if user is None: user = self.spawn_single_user(name) @@ -231,7 +208,7 @@ def post(self): auth_header_pat = re.compile(r'^token\s+([^\s]+)$') def token_authorized(method): - """decorator for a method authorized by the Authorization header""" + """decorator for a method authorized by the Authorization token header""" def check_token(self, *args, **kwargs): auth_header = self.request.headers.get('Authorization', '') match = auth_header_pat.match(auth_header) @@ -252,12 +229,7 @@ class AuthorizationsHandler(BaseHandler): @token_authorized def get(self, token): db_token = self.db.query(db.CookieToken).filter(db.CookieToken.token == token).first() - import IPython - IPython.embed() if db_token is None: - # app_log.debug('cookie tokens: %r', - # { user:s.cookie_token for user,s in self.user_manager.users.items() } - # ) raise web.HTTPError(404) self.write(json.dumps({ 'user' : db_token.user.name, diff --git a/multiuser/singleuser.py b/multiuser/singleuser.py index 54540c729e..72ba422653 100644 --- a/multiuser/singleuser.py +++ b/multiuser/singleuser.py @@ -5,13 +5,15 @@ import requests +from tornado import ioloop from tornado import web from IPython.utils.traitlets import Unicode -from IPython.html import utils from IPython.html.notebookapp import NotebookApp +from .utils import url_path_join + # Define two methods to attach to AuthenticatedHandler, # which authenticate via the central auth server. @@ -26,7 +28,7 @@ def verify_token(self, token): hub_api_url = self.settings['hub_api_url'] hub_api_key = self.settings['hub_api_key'] - r = requests.get(utils.url_path_join( + r = requests.get(url_path_join( hub_api_url, "authorizations", token, ), headers = {'Authorization' : 'token %s' % hub_api_key} @@ -53,11 +55,7 @@ def get_current_user(self): if user == my_user: return user else: - # import IPython - # IPython.embed() return None - # imoprt - # raise web.HTTPError(403, "User %s does not have access to %s" % (user, my_user)) else: self.log.debug("No token cookie") return None @@ -83,6 +81,10 @@ class SingleUserNotebookApp(NotebookApp): aliases = aliases browser = False + def _confirm_exit(self): + # disable the exit confirmation for background notebook processes + ioloop.IOLoop.instance().stop() + def init_webapp(self): # monkeypatch authentication to use the hub from IPython.html.base.handlers import AuthenticatedHandler @@ -97,7 +99,7 @@ def init_webapp(self): s['hub_api_key'] = env.get('IPY_API_TOKEN', '') s['cookie_secret'] = env.get('IPY_COOKIE_SECRET', '') s['cookie_name'] = self.cookie_name - s['login_url'] = utils.url_path_join(self.hub_prefix, 'login') + s['login_url'] = url_path_join(self.hub_prefix, 'login') s['hub_api_url'] = self.hub_api_url super(SingleUserNotebookApp, self).init_webapp() diff --git a/multiuser/spawner.py b/multiuser/spawner.py index 7735d38ec5..6ab2db7e6d 100644 --- a/multiuser/spawner.py +++ b/multiuser/spawner.py @@ -48,6 +48,13 @@ def _env_default(self): self._env_key(env, 'API_TOKEN', self.api_token) return env + cmd = List(Unicode, config=True, + help="""The command used for starting notebooks.""" + ) + def _cmd_default(self): + # should have sudo -u self.user + return [sys.executable, '-m', 'multiuser.singleuser'] + @classmethod def fromJSON(cls, state, **kwargs): """Create a new instance, and load its JSON state @@ -94,7 +101,7 @@ def get_args(self): '--base-url=%s' % self.user.server.base_url, '--hub-prefix=%s' % self.hub.server.base_url, - '--hub-api-url=%s' % self.hub.api_host_url, + '--hub-api-url=%s' % self.hub.api_url, ] def start(self): @@ -107,15 +114,8 @@ def poll(self): raise NotImplementedError("Override in subclass") -class PopenSpawner(Spawner): +class LocalProcessSpawner(Spawner): """A Spawner that just uses Popen to start local processes.""" - cmd = List(Unicode, config=True, - help="""The command used for starting notebooks.""" - ) - def _cmd_default(self): - # should have sudo -u self.user - return [sys.executable, '-m', 'multiuser.singleuser'] - proc = Instance(Popen) pid = Integer() @@ -144,13 +144,12 @@ def poll(self): # if we resumed from stored state, # we don't have the Popen handle anymore - # this doesn't work on Windows. - # multi-user doesn't support Windows. + # this doesn't work on Windows, but that's okay because we don't support Windows. try: os.kill(self.pid, 0) except OSError as e: if e.errno == errno.ESRCH: - # no such process, return exitcode == 0, since we don't know + # no such process, return exitcode == 0, since we don't know the exit status return 0 else: # None indicates the process is running @@ -159,7 +158,7 @@ def poll(self): def _wait_for_death(self, timeout=10): """wait for the process to die, up to timeout seconds""" for i in range(int(timeout * 10)): - if self.poll() is None: + if self.poll() is not None: break else: time.sleep(0.1) @@ -181,5 +180,8 @@ def stop(self, now=False): if self.poll() is None: os.kill(self.pid, signal.SIGKILL) self._wait_for_death(5) - - # it all failed, zombie process + + if self.poll() is None: + # it all failed, zombie process + self.log.warn("Process %i never died", self.pid) + diff --git a/multiuser/utils.py b/multiuser/utils.py index a589e5e49a..a2a4d54967 100644 --- a/multiuser/utils.py +++ b/multiuser/utils.py @@ -6,7 +6,7 @@ import socket import time - +from IPython.html.utils import url_path_join def random_port(): """get a single random port"""