From 1cfc2110bb529220ce8aacfc78acd39b62e178ae Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 30 Oct 2020 08:54:12 -0600 Subject: [PATCH 01/15] [DRAFT] Asynchronous Contents API --- .../services/contents/checkpoints.py | 51 +++ jupyter_server/services/contents/manager.py | 303 ++++++++++++++++-- tests/services/contents/test_config.py | 12 +- 3 files changed, 346 insertions(+), 20 deletions(-) diff --git a/jupyter_server/services/contents/checkpoints.py b/jupyter_server/services/contents/checkpoints.py index c29a669c2..16460d520 100644 --- a/jupyter_server/services/contents/checkpoints.py +++ b/jupyter_server/services/contents/checkpoints.py @@ -140,3 +140,54 @@ def get_notebook_checkpoint(self, checkpoint_id, path): } """ raise NotImplementedError("must be implemented in a subclass") + + +class AsyncCheckpoints(Checkpoints): + """ + Base class for managing checkpoints for a ContentsManager asynchronously. + """ + + async def rename_all_checkpoints(self, old_path, new_path): + """Rename all checkpoints for old_path to new_path.""" + for cp in (await self.list_checkpoints(old_path)): + await self.rename_checkpoint(cp['id'], old_path, new_path) + + async def delete_all_checkpoints(self, path): + """Delete all checkpoints for the given path.""" + for checkpoint in (await self.list_checkpoints(path)): + await self.delete_checkpoint(checkpoint['id'], path) + + +class AsyncGenericCheckpointsMixin(GenericCheckpointsMixin): + """ + Helper for creating Asynchronous Checkpoints subclasses that can be used with any + ContentsManager. + """ + + async def create_checkpoint(self, contents_mgr, path): + model = await contents_mgr.get(path, content=True) + type = model['type'] + if type == 'notebook': + return await self.create_notebook_checkpoint( + model['content'], + path, + ) + elif type == 'file': + return await self.create_file_checkpoint( + model['content'], + model['format'], + path, + ) + else: + raise HTTPError(500, u'Unexpected type %s' % type) + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint.""" + type = await contents_mgr.get(path, content=False)['type'] + if type == 'notebook': + model = await self.get_notebook_checkpoint(checkpoint_id, path) + elif type == 'file': + model = await self.get_file_checkpoint(checkpoint_id, path) + else: + raise HTTPError(500, u'Unexpected type %s' % type) + await contents_mgr.save(model, path) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 28ac63fcc..0aaf69a98 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -12,7 +12,7 @@ from tornado.web import HTTPError, RequestHandler from ...files.handlers import FilesHandler -from .checkpoints import Checkpoints +from .checkpoints import Checkpoints, AsyncCheckpoints from traitlets.config.configurable import LoggingConfigurable from nbformat import sign, validate as validate_nb, ValidationError from nbformat.v4 import new_notebook @@ -334,7 +334,7 @@ def increment_filename(self, filename, path='', insert=''): basename, dot, ext = filename.rpartition('.') if ext != 'ipynb': basename, dot, ext = filename.partition('.') - + suffix = dot + ext for i in itertools.count(): @@ -357,29 +357,29 @@ def validate_notebook_model(self, model): e.message, json.dumps(e.instance, indent=1, default=lambda obj: ''), ) return model - + def new_untitled(self, path='', type='', ext=''): """Create a new untitled file or directory in path - + path must be a directory - + File extension can be specified. - + Use `new` to create files with a fully specified path (including filename). """ path = path.strip('/') if not self.dir_exists(path): raise HTTPError(404, 'No such directory: %s' % path) - + model = {} if type: model['type'] = type - + if ext == '.ipynb': model.setdefault('type', 'notebook') else: model.setdefault('type', 'file') - + insert = '' if model['type'] == 'directory': untitled = self.untitled_directory @@ -391,25 +391,25 @@ def new_untitled(self, path='', type='', ext=''): untitled = self.untitled_file else: raise HTTPError(400, "Unexpected model type: %r" % model['type']) - + name = self.increment_filename(untitled + ext, path, insert=insert) path = u'{0}/{1}'.format(path, name) return self.new(model, path) - + def new(self, model=None, path=''): """Create a new file or directory and return its model with no content. - + To create a new untitled entity in a directory, use `new_untitled`. """ path = path.strip('/') if model is None: model = {} - + if path.endswith('.ipynb'): model.setdefault('type', 'notebook') else: model.setdefault('type', 'file') - + # no content, not a directory, so fill out new-file model if 'content' not in model and model['type'] != 'directory': if model['type'] == 'notebook': @@ -419,7 +419,7 @@ def new(self, model=None, path=''): model['content'] = '' model['type'] = 'file' model['format'] = 'text' - + model = self.save(model, path) return model @@ -429,7 +429,7 @@ def copy(self, from_path, to_path=None): If to_path not specified, it will be the parent directory of from_path. If to_path is a directory, filename will increment `from_path-Copy#.ext`. Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. - For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. from_path must be a full path to a file. """ @@ -442,20 +442,20 @@ def copy(self, from_path, to_path=None): else: from_dir = '' from_name = path - + model = self.get(path) model.pop('path', None) model.pop('name', None) if model['type'] == 'directory': raise HTTPError(400, "Can't copy directories") - + if to_path is None: to_path = from_dir if self.dir_exists(to_path): name = copy_pat.sub(u'.', from_name) to_name = self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) - + model = self.save(model, to_path) return model @@ -530,3 +530,268 @@ def list_checkpoints(self, path): def delete_checkpoint(self, checkpoint_id, path): return self.checkpoints.delete_checkpoint(checkpoint_id, path) + + +class AsyncContentsManager(ContentsManager): + """Base class for serving files and directories asynchronously.""" + + checkpoints_class = Type(AsyncCheckpoints, config=True) + checkpoints = Instance(AsyncCheckpoints, config=True) + + # ContentsManager API part 1: methods that must be + # implemented in subclasses. + + async def dir_exists(self, path): + """Does a directory exist at the given path? + + Like os.path.isdir + + Override this method in subclasses. + + Parameters + ---------- + path : string + The path to check + + Returns + ------- + exists : bool + Whether the path does indeed exist. + """ + raise NotImplementedError + + async def is_hidden(self, path): + """Is path a hidden directory or file? + + Parameters + ---------- + path : string + The path to check. This is an API path (`/` separated, + relative to root dir). + + Returns + ------- + hidden : bool + Whether the path is hidden. + + """ + raise NotImplementedError + + async def file_exists(self, path=''): + """Does a file exist at the given path? + + Like os.path.isfile + + Override this method in subclasses. + + Parameters + ---------- + path : string + The API path of a file to check for. + + Returns + ------- + exists : bool + Whether the file exists. + """ + raise NotImplementedError('must be implemented in a subclass') + + async def exists(self, path): + """Does a file or directory exist at the given path? + + Like os.path.exists + + Parameters + ---------- + path : string + The API path of a file or directory to check for. + + Returns + ------- + exists : bool + Whether the target exists. + """ + return await (self.file_exists(path) or self.dir_exists(path)) + + async def get(self, path, content=True, type=None, format=None): + """Get a file or directory model.""" + raise NotImplementedError('must be implemented in a subclass') + + async def save(self, model, path): + """ + Save a file or directory model to path. + + Should return the saved model with no content. Save implementations + should call self.run_pre_save_hook(model=model, path=path) prior to + writing any data. + """ + raise NotImplementedError('must be implemented in a subclass') + + async def delete_file(self, path): + """Delete the file or directory at path.""" + raise NotImplementedError('must be implemented in a subclass') + + async def rename_file(self, old_path, new_path): + """Rename a file or directory.""" + raise NotImplementedError('must be implemented in a subclass') + + # ContentsManager API part 2: methods that have useable default + # implementations, but can be overridden in subclasses. + + async def delete(self, path): + """Delete a file/directory and any associated checkpoints.""" + path = path.strip('/') + if not path: + raise HTTPError(400, "Can't delete root") + + await self.delete_file(path) + await self.checkpoints.delete_all_checkpoints(path) + + async def rename(self, old_path, new_path): + """Rename a file and any checkpoints associated with that file.""" + await self.rename_file(old_path, new_path) + await self.checkpoints.rename_all_checkpoints(old_path, new_path) + + async def update(self, model, path): + """Update the file's path + + For use in PATCH requests, to enable renaming a file without + re-uploading its contents. Only used for renaming at the moment. + """ + path = path.strip('/') + new_path = await model.get('path', path).strip('/') + if path != new_path: + await self.rename(path, new_path) + model = await self.get(new_path, content=False) + return model + + async def new_untitled(self, path='', type='', ext=''): + """Create a new untitled file or directory in path + + path must be a directory + + File extension can be specified. + + Use `new` to create files with a fully specified path (including filename). + """ + path = path.strip('/') + if not (await self.dir_exists(path)): + raise HTTPError(404, 'No such directory: %s' % path) + + model = {} + if type: + model['type'] = type + + if ext == '.ipynb': + model.setdefault('type', 'notebook') + else: + model.setdefault('type', 'file') + + insert = '' + if model['type'] == 'directory': + untitled = self.untitled_directory + insert = ' ' + elif model['type'] == 'notebook': + untitled = self.untitled_notebook + ext = '.ipynb' + elif model['type'] == 'file': + untitled = self.untitled_file + else: + raise HTTPError(400, "Unexpected model type: %r" % model['type']) + + name = self.increment_filename(untitled + ext, path, insert=insert) + path = u'{0}/{1}'.format(path, name) + return await self.new(model, path) + + async def new(self, model=None, path=''): + """Create a new file or directory and return its model with no content. + + To create a new untitled entity in a directory, use `new_untitled`. + """ + path = path.strip('/') + if model is None: + model = {} + + if path.endswith('.ipynb'): + model.setdefault('type', 'notebook') + else: + model.setdefault('type', 'file') + + # no content, not a directory, so fill out new-file model + if 'content' not in model and model['type'] != 'directory': + if model['type'] == 'notebook': + model['content'] = new_notebook() + model['format'] = 'json' + else: + model['content'] = '' + model['type'] = 'file' + model['format'] = 'text' + + model = await self.save(model, path) + return model + + async def copy(self, from_path, to_path=None): + """Copy an existing file and return its new model. + + If to_path not specified, it will be the parent directory of from_path. + If to_path is a directory, filename will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + + from_path must be a full path to a file. + """ + path = from_path.strip('/') + if to_path is not None: + to_path = to_path.strip('/') + + if '/' in path: + from_dir, from_name = path.rsplit('/', 1) + else: + from_dir = '' + from_name = path + + model = await self.get(path) + model.pop('path', None) + model.pop('name', None) + if model['type'] == 'directory': + raise HTTPError(400, "Can't copy directories") + if to_path is None: + to_path = from_dir + if (await self.dir_exists(to_path)) : + name = copy_pat.sub(u'.', from_name) + to_name = self.increment_filename(name, to_path, insert='-Copy') + to_path = u'{0}/{1}'.format(to_path, to_name) + + model = await self.save(model, to_path) + return model + + async def trust_notebook(self, path): + """Explicitly trust a notebook + + Parameters + ---------- + path : string + The path of a notebook + """ + model = await self.get(path) + nb = model['content'] + self.log.warning("Trusting notebook %s", path) + self.notary.mark_cells(nb, True) + self.check_and_sign(nb, path) + + # Part 3: Checkpoints API + async def create_checkpoint(self, path): + """Create a checkpoint.""" + return await self.checkpoints.create_checkpoint(self, path) + + async def restore_checkpoint(self, checkpoint_id, path): + """ + Restore a checkpoint. + """ + await self.checkpoints.restore_checkpoint(self, checkpoint_id, path) + + async def list_checkpoints(self, path): + return await self.checkpoints.list_checkpoints(path) + + async def delete_checkpoint(self, checkpoint_id, path): + return await self.checkpoints.delete_checkpoint(checkpoint_id, path) diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 9b4c862de..3625f781b 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,7 +1,9 @@ import pytest from traitlets.config import Config +from jupyter_server.services.contents.checkpoints import AsyncCheckpoints from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints +from jupyter_server.services.contents.manager import AsyncContentsManager @pytest.fixture @@ -10,4 +12,12 @@ def jp_server_config(): def test_config_did_something(jp_serverapp): - assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) \ No newline at end of file + assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) + + +async def test_async_contents_manager(jp_configurable_serverapp): + config = {'ContentsManager': {'checkpoints_class': AsyncCheckpoints}} + argv = ['--ServerApp.contents_manager_class=jupyter_server.services.contents.manager.AsyncContentsManager'] + app = jp_configurable_serverapp(config=config, argv=argv) + assert isinstance(app.contents_manager, AsyncContentsManager) + From ca60bcf84245b3ff2b672f5a2a0a3c84596296a0 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 12 Nov 2020 11:46:20 -0700 Subject: [PATCH 02/15] Add asynchronous support section --- docs/source/developers/contents.rst | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index cd0f7f7aa..21472d91d 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -192,19 +192,19 @@ methods: ContentsManager.is_hidden You may be required to specify a Checkpoints object, as the default one, -``FileCheckpoints``, could be incompatible with your custom +``FileCheckpoints``, could be incompatible with your custom ContentsManager. Customizing Checkpoints ----------------------- .. currentmodule:: jupyter_server.services.contents.checkpoints -Customized Checkpoint definitions allows behavior to be +Customized Checkpoint definitions allows behavior to be altered and extended. The ``Checkpoints`` and ``GenericCheckpointsMixin`` classes (from :mod:`jupyter_server.services.contents.checkpoints`) -have reusable code and are intended to be used together, +have reusable code and are intended to be used together, but require the following methods to be implemented. .. autosummary:: @@ -220,7 +220,7 @@ No-op example ~~~~~~~~~~~~~ Here is an example of a no-op checkpoints object - note the mixin -comes first. The docstrings indicate what each method should do or +comes first. The docstrings indicate what each method should do or return for a more complete implementation. .. code-block:: python @@ -238,7 +238,7 @@ return for a more complete implementation. def delete_checkpoint(self, checkpoint_id, path): """deletes a checkpoint for a file""" def list_checkpoints(self, path): - """returns a list of checkpoint models for a given file, + """returns a list of checkpoint models for a given file, default just does one per file """ return [] @@ -267,3 +267,13 @@ ContentsManager. .. _NBFormat: https://nbformat.readthedocs.io/en/latest/index.html .. _PGContents: https://github.com/quantopian/pgcontents .. _PostgreSQL: https://www.postgresql.org/ + +Asynchronous Support +~~~~~~~~~~~~~~~~~~~~ + +To execute file operations asynchronously in a virtual filesystem, the following classes are available. + +- :class:`~manager.AsyncContentsManager` +- :class:`~filemanager.AsyncFileContentsManager` +- :class:`~checkpoints.AsyncCheckpoints` + From 3a97a6ab0bd2e57c7ee663c91359e731a7942034 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 12 Nov 2020 11:46:49 -0700 Subject: [PATCH 03/15] Create AsyncFileContentsManager --- jupyter_server/serverapp.py | 4 +- .../services/contents/filecheckpoints.py | 69 +++- jupyter_server/services/contents/fileio.py | 117 ++++++- .../services/contents/filemanager.py | 305 +++++++++++++++++- jupyter_server/services/contents/handlers.py | 21 +- jupyter_server/services/contents/manager.py | 61 +++- setup.py | 1 + tests/services/contents/test_api.py | 8 + tests/services/contents/test_manager.py | 235 +++++++------- tests/services/kernels/test_api.py | 1 + 10 files changed, 679 insertions(+), 143 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 6e6b9aafa..f6f203772 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -68,8 +68,8 @@ from .log import log_request from .services.kernels.kernelmanager import MappingKernelManager, AsyncMappingKernelManager from .services.config import ConfigManager -from .services.contents.manager import ContentsManager -from .services.contents.filemanager import FileContentsManager +from .services.contents.manager import AsyncContentsManager, ContentsManager +from .services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from .services.contents.largefilemanager import LargeFileManager from .services.sessions.sessionmanager import SessionManager from .gateway.managers import GatewayKernelManager, GatewayKernelSpecManager, GatewaySessionManager, GatewayClient diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index a8a795da6..5ddf8885e 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -7,12 +7,15 @@ from tornado.web import HTTPError from .checkpoints import ( + AsyncCheckpoints, Checkpoints, GenericCheckpointsMixin, ) -from .fileio import FileManagerMixin +from .fileio import AsyncFileManagerMixin, FileManagerMixin +from anyio import run_sync_in_worker_thread from jupyter_core.utils import ensure_dir_exists +from jupyter_server.utils import ensure_async from ipython_genutils.py3compat import getcwd from traitlets import Unicode @@ -137,6 +140,70 @@ def no_such_checkpoint(self, path, checkpoint_id): ) +class AsyncFileCheckpoints(FileCheckpoints, AsyncFileManagerMixin, AsyncCheckpoints): + async def create_checkpoint(self, contents_mgr, path): + """Create a checkpoint.""" + checkpoint_id = u'checkpoint' + src_path = contents_mgr._get_os_path(path) + dest_path = self.checkpoint_path(checkpoint_id, path) + await self._copy(src_path, dest_path) + return (await self.checkpoint_model(checkpoint_id, dest_path)) + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint.""" + src_path = self.checkpoint_path(checkpoint_id, path) + dest_path = contents_mgr._get_os_path(path) + await self._copy(src_path, dest_path) + + async def checkpoint_model(self, checkpoint_id, os_path): + """construct the info dict for a given checkpoint""" + stats = await run_sync_in_worker_thread(os.stat, os_path) + last_modified = tz.utcfromtimestamp(stats.st_mtime) + info = dict( + id=checkpoint_id, + last_modified=last_modified, + ) + return info + + # ContentsManager-independent checkpoint API + async def rename_checkpoint(self, checkpoint_id, old_path, new_path): + """Rename a checkpoint from old_path to new_path.""" + old_cp_path = self.checkpoint_path(checkpoint_id, old_path) + new_cp_path = self.checkpoint_path(checkpoint_id, new_path) + if os.path.isfile(old_cp_path): + self.log.debug( + "Renaming checkpoint %s -> %s", + old_cp_path, + new_cp_path, + ) + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.move, old_cp_path, new_cp_path) + + async def delete_checkpoint(self, checkpoint_id, path): + """delete a file's checkpoint""" + path = path.strip('/') + cp_path = self.checkpoint_path(checkpoint_id, path) + if not os.path.isfile(cp_path): + self.no_such_checkpoint(path, checkpoint_id) + + self.log.debug("unlinking %s", cp_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(os.unlink, cp_path) + + async def list_checkpoints(self, path): + """list the checkpoints for a given file + + This contents manager currently only supports one checkpoint per file. + """ + path = path.strip('/') + checkpoint_id = "checkpoint" + os_path = self.checkpoint_path(checkpoint_id, path) + if not os.path.isfile(os_path): + return [] + else: + return [await self.checkpoint_model(checkpoint_id, os_path)] + + class GenericFileCheckpoints(GenericCheckpointsMixin, FileCheckpoints): """ Local filesystem Checkpoints that works with any conforming diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index d1fdaefa2..91e095729 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -7,10 +7,12 @@ from contextlib import contextmanager import errno +from functools import partial import io import os import shutil +from anyio import open_file, run_sync_in_worker_thread from tornado.web import HTTPError from jupyter_server.utils import ( @@ -32,6 +34,11 @@ def replace_file(src, dst): """ os.replace(src, dst) +async def async_replace_file(src, dst): + """ replace dst with src asynchronously + """ + await run_sync_in_worker_thread(os.replace, src, dst) + def copy2_safe(src, dst, log=None): """copy src to dst @@ -44,6 +51,18 @@ def copy2_safe(src, dst, log=None): if log: log.debug("copystat on %s failed", dst, exc_info=True) +async def async_copy2_safe(src, dst, log=None): + """copy src to dst asynchronously + + like shutil.copy2, but log errors in copystat instead of raising + """ + await run_sync_in_worker_thread(shutil.copyfile, src, dst) + try: + await run_sync_in_worker_thread(shutil.copystat, src, dst) + except OSError: + if log: + log.debug("copystat on %s failed", dst, exc_info=True) + def path_to_intermediate(path): '''Name of the intermediate file used in atomic writes. @@ -116,11 +135,10 @@ def atomic_writing(path, text=True, encoding='utf-8', log=None, **kwargs): os.remove(tmp_path) - @contextmanager def _simple_writing(path, text=True, encoding='utf-8', log=None, **kwargs): """Context manager to write file without doing atomic writing - ( for weird filesystem eg: nfs). + (for weird filesystem eg: nfs). Parameters ---------- @@ -159,8 +177,6 @@ def _simple_writing(path, text=True, encoding='utf-8', log=None, **kwargs): fileobj.close() - - class FileManagerMixin(Configurable): """ Mixin for ContentsAPI classes that interact with the filesystem. @@ -186,7 +202,7 @@ class FileManagerMixin(Configurable): @contextmanager def open(self, os_path, *args, **kwargs): - """wrapper around io.open that turns permission errors into 403""" + """wrapper around open that turns permission errors into 403""" with self.perm_to_403(os_path): with io.open(os_path, *args, **kwargs) as f: yield f @@ -330,3 +346,94 @@ def _save_file(self, os_path, content, format): with self.atomic_writing(os_path, text=False) as f: f.write(bcontent) + +class AsyncFileManagerMixin(FileManagerMixin): + """ + Mixin for ContentsAPI classes that interact with the filesystem asynchronously. + """ + async def _copy(self, src, dest): + """copy src to dest + + like shutil.copy2, but log errors in copystat + """ + await async_copy2_safe(src, dest, log=self.log) + + async def _read_notebook(self, os_path, as_version=4): + """Read a notebook from an os path.""" + with self.open(os_path, 'r', encoding='utf-8') as f: + try: + return await run_sync_in_worker_thread(partial(nbformat.read, as_version=as_version), f) + except Exception as e: + e_orig = e + + # If use_atomic_writing is enabled, we'll guess that it was also + # enabled when this notebook was written and look for a valid + # atomic intermediate. + tmp_path = path_to_intermediate(os_path) + + if not self.use_atomic_writing or not os.path.exists(tmp_path): + raise HTTPError( + 400, + u"Unreadable Notebook: %s %r" % (os_path, e_orig), + ) + + # Move the bad file aside, restore the intermediate, and try again. + invalid_file = path_to_invalid(os_path) + async_replace_file(os_path, invalid_file) + async_replace_file(tmp_path, os_path) + return await self._read_notebook(os_path, as_version) + + async def _save_notebook(self, os_path, nb): + """Save a notebook to an os_path.""" + with self.atomic_writing(os_path, encoding='utf-8') as f: + await run_sync_in_worker_thread(partial(nbformat.write, version=nbformat.NO_CONVERT), nb, f) + + async def _read_file(self, os_path, format): + """Read a non-notebook file. + + os_path: The path to be read. + format: + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If not specified, try to decode as UTF-8, and fall back to base64 + """ + if not os.path.isfile(os_path): + raise HTTPError(400, "Cannot read non-file %s" % os_path) + + with self.open(os_path, 'rb') as f: + bcontent = await run_sync_in_worker_thread(f.read) + + if format is None or format == 'text': + # Try to interpret as unicode if format is unknown or if unicode + # was explicitly requested. + try: + return bcontent.decode('utf8'), 'text' + except UnicodeError as e: + if format == 'text': + raise HTTPError( + 400, + "%s is not UTF-8 encoded" % os_path, + reason='bad format', + ) from e + return encodebytes(bcontent).decode('ascii'), 'base64' + + async def _save_file(self, os_path, content, format): + """Save content of a generic file.""" + if format not in {'text', 'base64'}: + raise HTTPError( + 400, + "Must specify format of file contents as 'text' or 'base64'", + ) + try: + if format == 'text': + bcontent = content.encode('utf8') + else: + b64_bytes = content.encode('ascii') + bcontent = decodebytes(b64_bytes) + except Exception as e: + raise HTTPError( + 400, u'Encoding error saving %s: %s' % (os_path, e) + ) from e + + with self.atomic_writing(os_path, text=False) as f: + await run_sync_in_worker_thread(f.write, bcontent) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 32bc0389c..a46f0b64b 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -14,12 +14,13 @@ import mimetypes import nbformat +from anyio import run_sync_in_worker_thread from send2trash import send2trash from tornado import web -from .filecheckpoints import FileCheckpoints -from .fileio import FileManagerMixin -from .manager import ContentsManager +from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from .fileio import AsyncFileManagerMixin, FileManagerMixin +from .manager import AsyncContentsManager, ContentsManager from ...utils import exists from ipython_genutils.importstring import import_item @@ -548,3 +549,301 @@ def get_kernel_path(self, path, model=None): parent_dir = '' return parent_dir +class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): + @default('checkpoints_class') + def _checkpoints_class_default(self): + return AsyncFileCheckpoints + + async def _dir_model(self, path, content=True): + """Build a model for a directory + + if content is requested, will include a listing of the directory + """ + os_path = self._get_os_path(path) + + four_o_four = u'directory does not exist: %r' % path + + if not os.path.isdir(os_path): + raise web.HTTPError(404, four_o_four) + elif is_hidden(os_path, self.root_dir) and not self.allow_hidden: + self.log.info("Refusing to serve hidden directory %r, via 404 Error", + os_path + ) + raise web.HTTPError(404, four_o_four) + + model = self._base_model(path) + model['type'] = 'directory' + model['size'] = None + if content: + model['content'] = contents = [] + os_dir = self._get_os_path(path) + dir_contents = await run_sync_in_worker_thread(os.listdir, os_dir) + for name in dir_contents: + try: + os_path = os.path.join(os_dir, name) + except UnicodeDecodeError as e: + self.log.warning( + "failed to decode filename '%s': %s", name, e) + continue + + try: + st = await run_sync_in_worker_thread(os.lstat, os_path) + except OSError as e: + # skip over broken symlinks in listing + if e.errno == errno.ENOENT: + self.log.warning("%s doesn't exist", os_path) + else: + self.log.warning("Error stat-ing %s: %s", os_path, e) + continue + + if (not stat.S_ISLNK(st.st_mode) + and not stat.S_ISREG(st.st_mode) + and not stat.S_ISDIR(st.st_mode)): + self.log.debug("%s not a regular file", os_path) + continue + + if self.should_list(name): + if self.allow_hidden or not is_file_hidden(os_path, stat_res=st): + contents.append( + await self.get(path='%s/%s' % (path, name), content=False) + ) + + model['format'] = 'json' + + return model + + async def _file_model(self, path, content=True, format=None): + """Build a model for a file + + if content is requested, include the file contents. + + format: + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If not specified, try to decode as UTF-8, and fall back to base64 + """ + model = self._base_model(path) + model['type'] = 'file' + + os_path = self._get_os_path(path) + model['mimetype'] = mimetypes.guess_type(os_path)[0] + + if content: + content, format = await self._read_file(os_path, format) + if model['mimetype'] is None: + default_mime = { + 'text': 'text/plain', + 'base64': 'application/octet-stream' + }[format] + model['mimetype'] = default_mime + + model.update( + content=content, + format=format, + ) + + return model + + async def _notebook_model(self, path, content=True): + """Build a notebook model + + if content is requested, the notebook content will be populated + as a JSON structure (not double-serialized) + """ + model = self._base_model(path) + model['type'] = 'notebook' + os_path = self._get_os_path(path) + + if content: + nb = await self._read_notebook(os_path, as_version=4) + self.mark_trusted_cells(nb, path) + model['content'] = nb + model['format'] = 'json' + self.validate_notebook_model(model) + + return model + + async def get(self, path, content=True, type=None, format=None): + """ Takes a path for an entity and returns its model + + Parameters + ---------- + path : str + the API path that describes the relative path for the target + content : bool + Whether to include the contents in the reply + type : str, optional + The requested type - 'file', 'notebook', or 'directory'. + Will raise HTTPError 400 if the content doesn't match. + format : str, optional + The requested format for file contents. 'text' or 'base64'. + Ignored if this returns a notebook or directory model. + + Returns + ------- + model : dict + the contents model. If content=True, returns the contents + of the file or directory as well. + """ + path = path.strip('/') + + if not self.exists(path): + raise web.HTTPError(404, u'No such file or directory: %s' % path) + + os_path = self._get_os_path(path) + if os.path.isdir(os_path): + if type not in (None, 'directory'): + raise web.HTTPError(400, + u'%s is a directory, not a %s' % (path, type), reason='bad type') + model = await self._dir_model(path, content=content) + elif type == 'notebook' or (type is None and path.endswith('.ipynb')): + model = await self._notebook_model(path, content=content) + else: + if type == 'directory': + raise web.HTTPError(400, + u'%s is not a directory' % path, reason='bad type') + model = await self._file_model(path, content=content, format=format) + return model + + async def _save_directory(self, os_path, model, path=''): + """create a directory""" + if is_hidden(os_path, self.root_dir) and not self.allow_hidden: + raise web.HTTPError(400, u'Cannot create hidden directory %r' % os_path) + if not os.path.exists(os_path): + with self.perm_to_403(): + await run_sync_in_worker_thread(os.mkdir, os_path) + elif not os.path.isdir(os_path): + raise web.HTTPError(400, u'Not a directory: %s' % (os_path)) + else: + self.log.debug("Directory %r already exists", os_path) + + async def save(self, model, path=''): + """Save the file model and return the model with no content.""" + path = path.strip('/') + + if 'type' not in model: + raise web.HTTPError(400, u'No file type provided') + if 'content' not in model and model['type'] != 'directory': + raise web.HTTPError(400, u'No file content provided') + + os_path = self._get_os_path(path) + self.log.debug("Saving %s", os_path) + + self.run_pre_save_hook(model=model, path=path) + + try: + if model['type'] == 'notebook': + nb = nbformat.from_dict(model['content']) + self.check_and_sign(nb, path) + await self._save_notebook(os_path, nb) + # One checkpoint should always exist for notebooks. + if not (await self.checkpoints.list_checkpoints(path)): + await self.create_checkpoint(path) + elif model['type'] == 'file': + # Missing format will be handled internally by _save_file. + await self._save_file(os_path, model['content'], model.get('format')) + elif model['type'] == 'directory': + await self._save_directory(os_path, model, path) + else: + raise web.HTTPError(400, "Unhandled contents type: %s" % model['type']) + except web.HTTPError: + raise + except Exception as e: + self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) + raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' + % (path, e)) from e + + validation_message = None + if model['type'] == 'notebook': + self.validate_notebook_model(model) + validation_message = model.get('message', None) + + model = await self.get(path, content=False) + if validation_message: + model['message'] = validation_message + + self.run_post_save_hook(model=model, os_path=os_path) + + return model + + async def delete_file(self, path): + """Delete file at path.""" + path = path.strip('/') + os_path = self._get_os_path(path) + rm = os.unlink + if not os.path.exists(os_path): + raise web.HTTPError(404, u'File or directory does not exist: %s' % os_path) + + async def _check_trash(os_path): + if sys.platform in {'win32', 'darwin'}: + return True + + # It's a bit more nuanced than this, but until we can better + # distinguish errors from send2trash, assume that we can only trash + # files on the same partition as the home directory. + file_dev = (await run_sync_in_worker_thread(os.stat, os_path)).st_dev + home_dev = (await run_sync_in_worker_thread(os.stat, os.path.expanduser('~'))).st_dev + return file_dev == home_dev + + async def is_non_empty_dir(os_path): + if os.path.isdir(os_path): + # A directory containing only leftover checkpoints is + # considered empty. + cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None) + dir_contents = set(await run_sync_in_worker_thread(os.listdir, os_path)) + if dir_contents - {cp_dir}: + return True + + return False + + if self.delete_to_trash: + if sys.platform == 'win32' and await is_non_empty_dir(os_path): + # send2trash can really delete files on Windows, so disallow + # deleting non-empty files. See Github issue 3631. + raise web.HTTPError(400, u'Directory %s not empty' % os_path) + if await _check_trash(os_path): + self.log.debug("Sending %s to trash", os_path) + # Looking at the code in send2trash, I don't think the errors it + # raises let us distinguish permission errors from other errors in + # code. So for now, just let them all get logged as server errors. + send2trash(os_path) + return + else: + self.log.warning("Skipping trash for %s, on different device " + "to home directory", os_path) + + if os.path.isdir(os_path): + # Don't permanently delete non-empty directories. + if await is_non_empty_dir(os_path): + raise web.HTTPError(400, u'Directory %s not empty' % os_path) + self.log.debug("Removing directory %s", os_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.rmtree, os_path) + else: + self.log.debug("Unlinking file %s", os_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(rm, os_path) + + async def rename_file(self, old_path, new_path): + """Rename a file.""" + old_path = old_path.strip('/') + new_path = new_path.strip('/') + if new_path == old_path: + return + + new_os_path = self._get_os_path(new_path) + old_os_path = self._get_os_path(old_path) + + # Should we proceed with the move? + if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path): + raise web.HTTPError(409, u'File already exists: %s' % new_path) + + # Move the file + try: + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.move, old_os_path, new_os_path) + except web.HTTPError: + raise + except Exception as e: + raise web.HTTPError(500, u'Unknown error renaming file: %s %s' % + (old_path, e)) from e \ No newline at end of file diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 53aff0907..70e11366b 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -120,7 +120,7 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) self._finish_model(model) @@ -130,7 +130,7 @@ async def _copy(self, copy_from, copy_to=None): copy_from=copy_from, copy_to=copy_to or '', )) - model = self.contents_manager.copy(copy_from, copy_to) + model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -138,7 +138,7 @@ async def _copy(self, copy_from, copy_to=None): async def _upload(self, model, path): """Handle upload of a new file to path""" self.log.info(u"Uploading file to %s", path) - model = self.contents_manager.new(model, path) + model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -146,7 +146,8 @@ async def _upload(self, model, path): async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" self.log.info(u"Creating new %s in %s", type or 'file', path) - model = self.contents_manager.new_untitled(path=path, type=type, ext=ext) + model = await ensure_async(self.contents_manager.new_untitled( + path=path, type=type, ext=ext)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -156,7 +157,7 @@ async def _save(self, model, path): chunk = model.get("chunk", None) if not chunk or chunk == -1: # Avoid tedious log information self.log.info(u"Saving file at %s", path) - model = self.contents_manager.save(model, path) + model = await ensure_async(self.contents_manager.save(model, path)) validate_model(model, expect_content=False) self._finish_model(model) @@ -225,7 +226,7 @@ async def delete(self, path=''): """delete a file in the given path""" cm = self.contents_manager self.log.warning('delete %s', path) - cm.delete(path) + await ensure_async(cm.delete(path)) self.set_status(204) self.finish() @@ -236,7 +237,7 @@ class CheckpointsHandler(APIHandler): async def get(self, path=''): """get lists checkpoints for a file""" cm = self.contents_manager - checkpoints = cm.list_checkpoints(path) + checkpoints = await ensure_async(cm.list_checkpoints(path)) data = json.dumps(checkpoints, default=date_default) self.finish(data) @@ -244,7 +245,7 @@ async def get(self, path=''): async def post(self, path=''): """post creates a new checkpoint""" cm = self.contents_manager - checkpoint = cm.create_checkpoint(path) + checkpoint = await ensure_async(cm.create_checkpoint(path)) data = json.dumps(checkpoint, default=date_default) location = url_path_join(self.base_url, 'api/contents', url_escape(path), 'checkpoints', url_escape(checkpoint['id'])) @@ -259,7 +260,7 @@ class ModifyCheckpointsHandler(APIHandler): async def post(self, path, checkpoint_id): """post restores a file from a checkpoint""" cm = self.contents_manager - cm.restore_checkpoint(checkpoint_id, path) + await ensure_async(cm.restore_checkpoint(checkpoint_id, path)) self.set_status(204) self.finish() @@ -267,7 +268,7 @@ async def post(self, path, checkpoint_id): async def delete(self, path, checkpoint_id): """delete clears a checkpoint for a given file""" cm = self.contents_manager - cm.delete_checkpoint(checkpoint_id, path) + await ensure_async(cm.delete_checkpoint(checkpoint_id, path)) self.set_status(204) self.finish() diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 0aaf69a98..a7e3c61a0 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -32,6 +32,7 @@ from ipython_genutils.py3compat import string_types from jupyter_server.base.handlers import JupyterHandler from jupyter_server.transutils import _ +from jupyter_server.utils import ensure_async copy_pat = re.compile(r'\-Copy\d*\.') @@ -537,6 +538,18 @@ class AsyncContentsManager(ContentsManager): checkpoints_class = Type(AsyncCheckpoints, config=True) checkpoints = Instance(AsyncCheckpoints, config=True) + checkpoints_kwargs = Dict(config=True) + + @default('checkpoints') + def _default_checkpoints(self): + return self.checkpoints_class(**self.checkpoints_kwargs) + + @default('checkpoints_kwargs') + def _default_checkpoints_kwargs(self): + return dict( + parent=self, + log=self.log, + ) # ContentsManager API part 1: methods that must be # implemented in subclasses. @@ -659,12 +672,49 @@ async def update(self, model, path): re-uploading its contents. Only used for renaming at the moment. """ path = path.strip('/') - new_path = await model.get('path', path).strip('/') + new_path = model.get('path', path).strip('/') if path != new_path: await self.rename(path, new_path) model = await self.get(new_path, content=False) return model + async def increment_filename(self, filename, path='', insert=''): + """Increment a filename until it is unique. + + Parameters + ---------- + filename : unicode + The name of a file, including extension + path : unicode + The API path of the target's directory + insert: unicode + The characters to insert after the base filename + + Returns + ------- + name : unicode + A filename that is unique, based on the input filename. + """ + # Extract the full suffix from the filename (e.g. .tar.gz) + path = path.strip('/') + basename, dot, ext = filename.rpartition('.') + if ext != 'ipynb': + basename, dot, ext = filename.partition('.') + + suffix = dot + ext + + for i in itertools.count(): + if i: + insert_i = '{}{}'.format(insert, i) + else: + insert_i = '' + name = u'{basename}{insert}{suffix}'.format(basename=basename, + insert=insert_i, suffix=suffix) + file_exists = await ensure_async(self.exists(u'{}/{}'.format(path, name))) + if not file_exists: + break + return name + async def new_untitled(self, path='', type='', ext=''): """Create a new untitled file or directory in path @@ -675,7 +725,8 @@ async def new_untitled(self, path='', type='', ext=''): Use `new` to create files with a fully specified path (including filename). """ path = path.strip('/') - if not (await self.dir_exists(path)): + dir_exists = await ensure_async(self.dir_exists(path)) + if not dir_exists: raise HTTPError(404, 'No such directory: %s' % path) model = {} @@ -699,7 +750,7 @@ async def new_untitled(self, path='', type='', ext=''): else: raise HTTPError(400, "Unexpected model type: %r" % model['type']) - name = self.increment_filename(untitled + ext, path, insert=insert) + name = await self.increment_filename(untitled + ext, path, insert=insert) path = u'{0}/{1}'.format(path, name) return await self.new(model, path) @@ -757,9 +808,9 @@ async def copy(self, from_path, to_path=None): raise HTTPError(400, "Can't copy directories") if to_path is None: to_path = from_dir - if (await self.dir_exists(to_path)) : + if self.dir_exists(to_path): name = copy_pat.sub(u'.', from_name) - to_name = self.increment_filename(name, to_path, insert='-Copy') + to_name = await self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) model = await self.save(model, to_path) diff --git a/setup.py b/setup.py index 063764d2a..96279432b 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", + "anyio>=2.0.2", ], extras_require = { 'test': ['coverage', 'requests', diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index fea04db7b..623c2e15e 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -12,6 +12,7 @@ ) from jupyter_server.utils import url_path_join +from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from base64 import encodebytes, decodebytes @@ -41,6 +42,13 @@ def dirs_only(dir_model): ] +@pytest.fixture(params=["FileContentsManager", "AsyncFileContentsManager"]) +def argv(request): + if request.param == "AsyncFileContentsManager" and sys.version_info < (3, 6): + pytest.skip("Kernel manager is AsyncFileContentsManager, Python version < 3.6") + return ["--ServerApp.contents_manager_class=jupyter_server.services.contents.filemanager." + request.param] + + @pytest.fixture def contents_dir(tmp_path, jp_serverapp): return tmp_path / jp_serverapp.root_dir diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 4d3d04f8f..7357611d9 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -10,7 +10,8 @@ from nbformat import v4 as nbformat -from jupyter_server.services.contents.filemanager import FileContentsManager +from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from jupyter_server.utils import ensure_async from ...utils import expected_http_error # -------------- Functions ---------------------------- @@ -43,30 +44,30 @@ def add_code_cell(notebook): notebook.cells.append(cell) -def new_notebook(jp_contents_manager): +async def new_notebook(jp_contents_manager): cm = jp_contents_manager - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) nb = full_model['content'] nb['metadata']['counter'] = int(1e6 * time.time()) add_code_cell(nb) - cm.save(full_model, path) + await ensure_async(cm.save(full_model, path)) return nb, name, path -def make_populated_dir(jp_contents_manager, api_path): +async def make_populated_dir(jp_contents_manager, api_path): cm = jp_contents_manager _make_dir(cm, api_path) - cm.new(path="/".join([api_path, "nb.ipynb"])) - cm.new(path="/".join([api_path, "file.txt"])) + await ensure_async(cm.new(path="/".join([api_path, "nb.ipynb"]))) + await ensure_async(cm.new(path="/".join([api_path, "file.txt"]))) -def check_populated_dir_files(jp_contents_manager, api_path): - dir_model = jp_contents_manager.get(api_path) +async def check_populated_dir_files(jp_contents_manager, api_path): + dir_model = await ensure_async(jp_contents_manager.get(api_path)) assert dir_model['path'] == api_path assert dir_model['type'] == "directory" @@ -85,45 +86,45 @@ def check_populated_dir_files(jp_contents_manager, api_path): # ----------------- Tests ---------------------------------- -def test_root_dir(tmp_path): - fm = FileContentsManager(root_dir=str(tmp_path)) +def test_root_dir(file_contents_manager_class, tmp_path): + fm = file_contents_manager_class(root_dir=str(tmp_path)) assert fm.root_dir == str(tmp_path) -def test_missing_root_dir(tmp_path): +def test_missing_root_dir(file_contents_manager_class, tmp_path): root = tmp_path / 'notebook' / 'dir' / 'is' / 'missing' with pytest.raises(TraitError): - FileContentsManager(root_dir=str(root)) + file_contents_manager_class(root_dir=str(root)) -def test_invalid_root_dir(tmp_path): +def test_invalid_root_dir(file_contents_manager_class, tmp_path): temp_file = tmp_path / 'file.txt' temp_file.write_text('') with pytest.raises(TraitError): - FileContentsManager(root_dir=str(temp_file)) + file_contents_manager_class(root_dir=str(temp_file)) -def test_get_os_path(tmp_path): - fm = FileContentsManager(root_dir=str(tmp_path)) +def test_get_os_path(file_contents_manager_class, tmp_path): + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('/path/to/notebook/test.ipynb') rel_path_list = '/path/to/notebook/test.ipynb'.split('/') fs_path = os.path.join(fm.root_dir, *rel_path_list) assert path == fs_path - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('test.ipynb') fs_path = os.path.join(fm.root_dir, 'test.ipynb') assert path == fs_path - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('////test.ipynb') fs_path = os.path.join(fm.root_dir, 'test.ipynb') assert path == fs_path -def test_checkpoint_subdir(tmp_path): +def test_checkpoint_subdir(file_contents_manager_class, tmp_path): subd = 'sub ∂ir' cp_name = 'test-cp.ipynb' - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) tmp_path.joinpath(subd).mkdir() cpm = fm.checkpoints cp_dir = cpm.checkpoint_path('cp', 'test.ipynb') @@ -136,18 +137,18 @@ def test_checkpoint_subdir(tmp_path): sys.platform == 'win32' and sys.version_info[0] < 3, reason="System platform is Windows, version < 3" ) -def test_bad_symlink(tmp_path): +async def test_bad_symlink(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) path = 'test bad symlink' _make_dir(cm, path) - file_model = cm.new_untitled(path=path, ext='.txt') + file_model = await ensure_async(cm.new_untitled(path=path, ext='.txt')) # create a broken symlink symlink(cm, "target", '%s/%s' % (path, 'bad symlink')) - model = cm.get(path) + model = await ensure_async(cm.get(path)) contents = { content['name']: content for content in model['content'] @@ -161,24 +162,24 @@ def test_bad_symlink(tmp_path): sys.platform == 'win32' and sys.version_info[0] < 3, reason="System platform is Windows, version < 3" ) -def test_good_symlink(tmp_path): +async def test_good_symlink(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) parent = 'test good symlink' name = 'good symlink' path = '{0}/{1}'.format(parent, name) _make_dir(cm, parent) - file_model = cm.new(path=parent + '/zfoo.txt') + file_model = await ensure_async(cm.new(path=parent + '/zfoo.txt')) # create a good symlink symlink(cm, file_model['path'], path) - symlink_model = cm.get(path, content=False) - dir_model = cm.get(parent) + symlink_model = await ensure_async(cm.get(path, content=False)) + dir_model = await ensure_async(cm.get(parent)) assert sorted(dir_model['content'], key=lambda x: x['name']) == [symlink_model, file_model] -def test_403(tmp_path): +async def test_403(file_contents_manager_class, tmp_path): if hasattr(os, 'getuid'): if os.getuid() == 0: raise pytest.skip("Can't test permissions as root") @@ -186,8 +187,8 @@ def test_403(tmp_path): raise pytest.skip("Can't test permissions on Windows") td = str(tmp_path) - cm = FileContentsManager(root_dir=td) - model = cm.new_untitled(type='file') + cm = file_contents_manager_class(root_dir=td) + model = await ensure_async(cm.new_untitled(type='file')) os_path = cm._get_os_path(model['path']) os.chmod(os_path, 0o400) @@ -197,9 +198,9 @@ def test_403(tmp_path): except HTTPError as e: assert e.status_code == 403 -def test_escape_root(tmp_path): +async def test_escape_root(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) # make foo, bar next to root with open(os.path.join(cm.root_dir, '..', 'foo'), 'w') as f: f.write('foo') @@ -207,34 +208,34 @@ def test_escape_root(tmp_path): f.write('bar') with pytest.raises(HTTPError) as e: - cm.get('..') + await ensure_async(cm.get('..')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.get('foo/../../../bar') + await ensure_async(cm.get('foo/../../../bar')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.delete('../foo') + await ensure_async(cm.delete('../foo')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.rename('../foo', '../bar') + await ensure_async(cm.rename('../foo', '../bar')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.save(model={ + await ensure_async(cm.save(model={ 'type': 'file', 'content': u'', 'format': 'text', - }, path='../foo') + }, path='../foo')) expected_http_error(e, 404) -def test_new_untitled(jp_contents_manager): +async def test_new_untitled(jp_contents_manager): cm = jp_contents_manager # Test in root directory - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -244,7 +245,7 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == 'Untitled.ipynb' # Test in sub-directory - model = cm.new_untitled(type='directory') + model = await ensure_async(cm.new_untitled(type='directory')) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -254,7 +255,7 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == 'Untitled Folder' sub_dir = model['path'] - model = cm.new_untitled(path=sub_dir) + model = await ensure_async(cm.new_untitled(path=sub_dir)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -264,64 +265,64 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == '%s/untitled' % sub_dir # Test with a compound extension - model = cm.new_untitled(path=sub_dir, ext='.foo.bar') + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.foo.bar')) assert model['name'] == 'untitled.foo.bar' - model = cm.new_untitled(path=sub_dir, ext='.foo.bar') + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.foo.bar')) assert model['name'] == 'untitled1.foo.bar' -def test_modified_date(jp_contents_manager): +async def test_modified_date(jp_contents_manager): cm = jp_contents_manager # Create a new notebook. - nb, name, path = new_notebook(cm) - model = cm.get(path) + nb, name, path = await new_notebook(cm) + model = await ensure_async(cm.get(path)) # Add a cell and save. add_code_cell(model['content']) - cm.save(model, path) + await ensure_async(cm.save(model, path)) # Reload notebook and verify that last_modified incremented. - saved = cm.get(path) + saved = await ensure_async(cm.get(path)) assert saved['last_modified'] >= model['last_modified'] # Move the notebook and verify that last_modified stayed the same. # (The frontend fires a warning if last_modified increases on the # renamed file.) new_path = 'renamed.ipynb' - cm.rename(path, new_path) - renamed = cm.get(new_path) + await ensure_async(cm.rename(path, new_path)) + renamed = await ensure_async(cm.get(new_path)) assert renamed['last_modified'] >= saved['last_modified'] -def test_get(jp_contents_manager): +async def test_get(jp_contents_manager): cm = jp_contents_manager # Create a notebook - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Check that we 'get' on the notebook we just created - model2 = cm.get(path) + model2 = await ensure_async(cm.get(path)) assert isinstance(model2, dict) assert 'name' in model2 assert 'path' in model2 assert model['name'] == name assert model['path'] == path - nb_as_file = cm.get(path, content=True, type='file') + nb_as_file = await ensure_async(cm.get(path, content=True, type='file')) assert nb_as_file['path'] == path assert nb_as_file['type'] == 'file' assert nb_as_file['format'] == 'text' assert not isinstance(nb_as_file['content'], dict) - nb_as_bin_file = cm.get(path, content=True, type='file', format='base64') + nb_as_bin_file = await ensure_async(cm.get(path, content=True, type='file', format='base64')) assert nb_as_bin_file['format'] == 'base64' # Test in sub-directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, ext='.ipynb') - model2 = cm.get(sub_dir + name) + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) + model2 = await ensure_async(cm.get(sub_dir + name)) assert isinstance(model2, dict) assert 'name' in model2 assert 'path' in model2 @@ -331,8 +332,8 @@ def test_get(jp_contents_manager): # Test with a regular file. - file_model_path = cm.new_untitled(path=sub_dir, ext='.txt')['path'] - file_model = cm.get(file_model_path) + file_model_path = (await ensure_async(cm.new_untitled(path=sub_dir, ext='.txt')))['path'] + file_model = await ensure_async(cm.get(file_model_path)) expected_model = { 'content': u'', 'format': u'text', @@ -351,7 +352,7 @@ def test_get(jp_contents_manager): # Create a sub-sub directory to test getting directory contents with a # subdir. _make_dir(cm, 'foo/bar') - dirmodel = cm.get('foo') + dirmodel = await ensure_async(cm.get('foo')) assert dirmodel['type'] == 'directory' assert isinstance(dirmodel['content'], list) assert len(dirmodel['content']) == 3 @@ -360,9 +361,9 @@ def test_get(jp_contents_manager): # Directory contents should match the contents of each individual entry # when requested with content=False. - model2_no_content = cm.get(sub_dir + name, content=False) - file_model_no_content = cm.get(u'foo/untitled.txt', content=False) - sub_sub_dir_no_content = cm.get('foo/bar', content=False) + model2_no_content = await ensure_async(cm.get(sub_dir + name, content=False)) + file_model_no_content = await ensure_async(cm.get(u'foo/untitled.txt', content=False)) + sub_sub_dir_no_content = await ensure_async(cm.get('foo/bar', content=False)) assert sub_sub_dir_no_content['path'] == 'foo/bar' assert sub_sub_dir_no_content['name'] == 'bar' @@ -379,19 +380,19 @@ def test_get(jp_contents_manager): assert False, "Unexpected directory entry: %s" % entry() with pytest.raises(HTTPError): - cm.get('foo', type='file') + await ensure_async(cm.get('foo', type='file')) -def test_update(jp_contents_manager): +async def test_update(jp_contents_manager): cm = jp_contents_manager # Create a notebook. - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Change the name in the model for rename model['path'] = 'test.ipynb' - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -399,19 +400,19 @@ def test_update(jp_contents_manager): # Make sure the old name is gone with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) # Test in sub-directory # Create a directory and notebook in that directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, type='notebook') + model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) path = model['path'] # Change the name in the model for rename d = path.rsplit('/', 1)[0] new_path = model['path'] = d + '/test_in_sub.ipynb' - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -420,21 +421,21 @@ def test_update(jp_contents_manager): # Make sure the old name is gone with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) -def test_save(jp_contents_manager): +async def test_save(jp_contents_manager): cm = jp_contents_manager # Create a notebook - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Get the model with 'content' - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) # Save the notebook - model = cm.save(full_model, path) + model = await ensure_async(cm.save(full_model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -445,13 +446,13 @@ def test_save(jp_contents_manager): # Create a directory and notebook in that directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, type='notebook') + model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) name = model['name'] path = model['path'] - model = cm.get(path) + model = await ensure_async(cm.get(path)) # Change the name in the model for rename - model = cm.save(model, path) + model = await ensure_async(cm.save(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -459,36 +460,36 @@ def test_save(jp_contents_manager): assert model['path'] == 'foo/Untitled.ipynb' -def test_delete(jp_contents_manager): +async def test_delete(jp_contents_manager): cm = jp_contents_manager # Create a notebook - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) # Delete the notebook - cm.delete(path) + await ensure_async(cm.delete(path)) # Check that deleting a non-existent path raises an error. with pytest.raises(HTTPError): - cm.delete(path) + await ensure_async(cm.delete(path)) # Check that a 'get' on the deleted notebook raises and error with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) -def test_rename(jp_contents_manager): +async def test_rename(jp_contents_manager): cm = jp_contents_manager # Create a new notebook - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) # Rename the notebook - cm.rename(path, "changed_path") + await ensure_async(cm.rename(path, "changed_path")) # Attempting to get the notebook under the old name raises an error with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) # Fetching the notebook under the new name is successful - assert isinstance(cm.get("changed_path"), dict) + assert isinstance(await ensure_async(cm.get("changed_path")), dict) # Ported tests on nested directory renaming from pgcontents all_dirs = ['foo', 'bar', 'foo/bar', 'foo/bar/foo', 'foo/bar/foo/bar'] @@ -496,93 +497,93 @@ def test_rename(jp_contents_manager): changed_dirs = all_dirs[2:] for _dir in all_dirs: - make_populated_dir(cm, _dir) - check_populated_dir_files(cm, _dir) + await make_populated_dir(cm, _dir) + await check_populated_dir_files(cm, _dir) # Renaming to an existing directory should fail for src, dest in combinations(all_dirs, 2): with pytest.raises(HTTPError) as e: - cm.rename(src, dest) + await ensure_async(cm.rename(src, dest)) assert expected_http_error(e, 409) # Creating a notebook in a non_existant directory should fail with pytest.raises(HTTPError) as e: - cm.new_untitled("foo/bar_diff", ext=".ipynb") + await ensure_async(cm.new_untitled("foo/bar_diff", ext=".ipynb")) assert expected_http_error(e, 404) - cm.rename("foo/bar", "foo/bar_diff") + await ensure_async(cm.rename("foo/bar", "foo/bar_diff")) # Assert that unchanged directories remain so for unchanged in unchanged_dirs: - check_populated_dir_files(cm, unchanged) + await check_populated_dir_files(cm, unchanged) # Assert changed directories can no longer be accessed under old names for changed_dirname in changed_dirs: with pytest.raises(HTTPError) as e: - cm.get(changed_dirname) + await ensure_async(cm.get(changed_dirname)) assert expected_http_error(e, 404) new_dirname = changed_dirname.replace("foo/bar", "foo/bar_diff", 1) - check_populated_dir_files(cm, new_dirname) + await check_populated_dir_files(cm, new_dirname) # Created a notebook in the renamed directory should work - cm.new_untitled("foo/bar_diff", ext=".ipynb") + await ensure_async(cm.new_untitled("foo/bar_diff", ext=".ipynb")) -def test_delete_root(jp_contents_manager): +async def test_delete_root(jp_contents_manager): cm = jp_contents_manager with pytest.raises(HTTPError) as e: - cm.delete('') + await ensure_async(cm.delete('')) assert expected_http_error(e, 400) -def test_copy(jp_contents_manager): +async def test_copy(jp_contents_manager): cm = jp_contents_manager parent = u'å b' name = u'nb √.ipynb' path = u'{0}/{1}'.format(parent, name) _make_dir(cm, parent) - orig = cm.new(path=path) + orig = await ensure_async(cm.new(path=path)) # copy with unspecified name - copy = cm.copy(path) + copy = await ensure_async(cm.copy(path)) assert copy['name'] == orig['name'].replace('.ipynb', '-Copy1.ipynb') # copy with specified name - copy2 = cm.copy(path, u'å b/copy 2.ipynb') + copy2 = await ensure_async(cm.copy(path, u'å b/copy 2.ipynb')) assert copy2['name'] == u'copy 2.ipynb' assert copy2['path'] == u'å b/copy 2.ipynb' # copy with specified path - copy2 = cm.copy(path, u'/') + copy2 = await ensure_async(cm.copy(path, u'/')) assert copy2['name'] == name assert copy2['path'] == name -def test_mark_trusted_cells(jp_contents_manager): +async def test_mark_trusted_cells(jp_contents_manager): cm = jp_contents_manager - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) cm.mark_trusted_cells(nb, path) for cell in nb.cells: if cell.cell_type == 'code': assert not cell.metadata.trusted - cm.trust_notebook(path) - nb = cm.get(path)['content'] + await ensure_async(cm.trust_notebook(path)) + nb = (await ensure_async(cm.get(path)))['content'] for cell in nb.cells: if cell.cell_type == 'code': assert cell.metadata.trusted -def test_check_and_sign(jp_contents_manager): +async def test_check_and_sign(jp_contents_manager): cm = jp_contents_manager - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) cm.mark_trusted_cells(nb, path) cm.check_and_sign(nb, path) assert not cm.notary.check_signature(nb) - cm.trust_notebook(path) - nb = cm.get(path)['content'] + await ensure_async(cm.trust_notebook(path)) + nb = (await ensure_async(cm.get(path)))['content'] cm.mark_trusted_cells(nb, path) cm.check_and_sign(nb, path) assert cm.notary.check_signature(nb) diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index bc2ade403..44c815891 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -4,6 +4,7 @@ import pytest + import tornado import urllib.parse from tornado.escape import url_escape From 24dd61839f8629f28a6185362b73b9b4aed61fef Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:55:16 -0700 Subject: [PATCH 04/15] Use jp prefix for fixtures --- jupyter_server/pytest_plugin.py | 3 +++ tests/services/contents/test_manager.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py index dee6d3f0d..b8454a6c4 100644 --- a/jupyter_server/pytest_plugin.py +++ b/jupyter_server/pytest_plugin.py @@ -19,6 +19,7 @@ from jupyter_server.extension import serverextension from jupyter_server.serverapp import ServerApp from jupyter_server.utils import url_path_join +<<<<<<< HEAD from jupyter_server.services.contents.filemanager import FileContentsManager from jupyter_server.services.contents.largefilemanager import LargeFileManager @@ -44,6 +45,8 @@ def mkdir(tmp_path, *parts): def jp_home_dir(tmp_path): """Provides a temporary HOME directory value.""" return mkdir(tmp_path, "home") +======= +>>>>>>> Use jp prefix for fixtures @pytest.fixture diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 7357611d9..cb2009567 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -11,9 +11,23 @@ from nbformat import v4 as nbformat from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from jupyter_server.utils import ensure_async from ...utils import expected_http_error +@pytest.fixture(params=[(FileContentsManager, True), + (FileContentsManager, False), + (AsyncFileContentsManager, True), + (AsyncFileContentsManager, False)]) +def jp_contents_manager(request, tmp_path): + contents_manager, use_atomic_writing = request.param + return contents_manager(root_dir=str(tmp_path), use_atomic_writing=use_atomic_writing) + + +@pytest.fixture(params=[FileContentsManager, AsyncFileContentsManager]) +def file_contents_manager_class(request, tmp_path): + return request.param + # -------------- Functions ---------------------------- def _make_dir(jp_contents_manager, api_path): From 6bd874d5365a0ad51f2b01a53555a97a255d1d52 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:56:25 -0700 Subject: [PATCH 05/15] Remove condition for testing python 3.5 --- tests/services/contents/test_api.py | 2 -- tests/services/kernels/test_api.py | 2 -- tests/services/sessions/test_api.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index 623c2e15e..8a8bacbb4 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -44,8 +44,6 @@ def dirs_only(dir_model): @pytest.fixture(params=["FileContentsManager", "AsyncFileContentsManager"]) def argv(request): - if request.param == "AsyncFileContentsManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncFileContentsManager, Python version < 3.6") return ["--ServerApp.contents_manager_class=jupyter_server.services.contents.filemanager." + request.param] diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index 44c815891..e0d28ddb1 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -18,8 +18,6 @@ @pytest.fixture(params=["MappingKernelManager", "AsyncMappingKernelManager"]) def argv(request): - if request.param == "AsyncMappingKernelManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncMappingKernelManager, Python version < 3.6") return ["--ServerApp.kernel_manager_class=jupyter_server.services.kernels.kernelmanager." + request.param] diff --git a/tests/services/sessions/test_api.py b/tests/services/sessions/test_api.py index 065e392a6..fd650d810 100644 --- a/tests/services/sessions/test_api.py +++ b/tests/services/sessions/test_api.py @@ -18,8 +18,6 @@ @pytest.fixture(params=["MappingKernelManager", "AsyncMappingKernelManager"]) def argv(request): - if request.param == "AsyncMappingKernelManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncMappingKernelManager, Python version < 3.6") return ["--ServerApp.kernel_manager_class=jupyter_server.services.kernels.kernelmanager." + request.param] From 5a521e305bd8c3faa6fae61bf7e470dfc05e9d52 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:59:50 -0700 Subject: [PATCH 06/15] Add new async classes to serverapp --- jupyter_server/serverapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index f6f203772..308946588 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -553,8 +553,8 @@ class ServerApp(JupyterApp): aliases = Dict(aliases) classes = [ - KernelManager, Session, MappingKernelManager, KernelSpecManager, - ContentsManager, FileContentsManager, NotebookNotary, + KernelManager, Session, MappingKernelManager, KernelSpecManager, AsyncMappingKernelManager, + ContentsManager, FileContentsManager, AsyncContentsManager, AsyncFileContentsManager, NotebookNotary, GatewayKernelManager, GatewayKernelSpecManager, GatewaySessionManager, GatewayClient ] From 0afe5b196a5be6a91215f8a3448ad3b52656d233 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 12:04:58 -0700 Subject: [PATCH 07/15] Fix lint errors --- jupyter_server/services/contents/filecheckpoints.py | 1 - jupyter_server/services/contents/fileio.py | 2 +- jupyter_server/services/contents/filemanager.py | 1 - jupyter_server/services/contents/manager.py | 1 - tests/services/contents/test_api.py | 1 - tests/services/contents/test_config.py | 1 - tests/services/contents/test_manager.py | 4 +--- 7 files changed, 2 insertions(+), 9 deletions(-) diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index 5ddf8885e..5910b6076 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -15,7 +15,6 @@ from anyio import run_sync_in_worker_thread from jupyter_core.utils import ensure_dir_exists -from jupyter_server.utils import ensure_async from ipython_genutils.py3compat import getcwd from traitlets import Unicode diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 91e095729..531715eb6 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -12,7 +12,7 @@ import os import shutil -from anyio import open_file, run_sync_in_worker_thread +from anyio import run_sync_in_worker_thread from tornado.web import HTTPError from jupyter_server.utils import ( diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index a46f0b64b..3bc590523 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -10,7 +10,6 @@ import shutil import stat import sys -import warnings import mimetypes import nbformat diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index a7e3c61a0..4868c2b96 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -30,7 +30,6 @@ default, ) from ipython_genutils.py3compat import string_types -from jupyter_server.base.handlers import JupyterHandler from jupyter_server.transutils import _ from jupyter_server.utils import ensure_async diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index 8a8bacbb4..30471d0ab 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -12,7 +12,6 @@ ) from jupyter_server.utils import url_path_join -from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from base64 import encodebytes, decodebytes diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 3625f781b..06159d0b5 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,6 +1,5 @@ import pytest -from traitlets.config import Config from jupyter_server.services.contents.checkpoints import AsyncCheckpoints from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints from jupyter_server.services.contents.manager import AsyncContentsManager diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index cb2009567..5bedb7977 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -10,7 +10,6 @@ from nbformat import v4 as nbformat -from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from jupyter_server.utils import ensure_async from ...utils import expected_http_error @@ -335,7 +334,7 @@ async def test_get(jp_contents_manager): # Test in sub-directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) + await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) model2 = await ensure_async(cm.get(sub_dir + name)) assert isinstance(model2, dict) assert 'name' in model2 @@ -461,7 +460,6 @@ async def test_save(jp_contents_manager): sub_dir = '/foo/' _make_dir(cm, 'foo') model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) - name = model['name'] path = model['path'] model = await ensure_async(cm.get(path)) From 3bc0febc8d8df19dcf660e7ddfeb36406bdbc4f0 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Wed, 18 Nov 2020 18:45:58 -0700 Subject: [PATCH 08/15] Skip test for windows --- tests/services/contents/test_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 5bedb7977..27534dd8f 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -192,12 +192,14 @@ async def test_good_symlink(file_contents_manager_class, tmp_path): assert sorted(dir_model['content'], key=lambda x: x['name']) == [symlink_model, file_model] +@pytest.mark.skipif( + sys.platform.startswith('win'), + reason="Can't test permissions on Windows" +) async def test_403(file_contents_manager_class, tmp_path): if hasattr(os, 'getuid'): if os.getuid() == 0: raise pytest.skip("Can't test permissions as root") - if sys.platform.startswith('win'): - raise pytest.skip("Can't test permissions on Windows") td = str(tmp_path) cm = file_contents_manager_class(root_dir=td) From 9be92d81c402fe0fd31df571e45e71647330076a Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 20 Nov 2020 10:10:05 -0700 Subject: [PATCH 09/15] Create AsyncLargeFileManager --- .../services/contents/largefilemanager.py | 70 ++++++++++++++++++- .../contents/test_largefilemanager.py | 37 ++++++---- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index 7359659ad..e48eeae91 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -1,10 +1,12 @@ -from jupyter_server.services.contents.filemanager import FileContentsManager +from anyio import run_sync_in_worker_thread from contextlib import contextmanager from tornado import web import nbformat import base64 import os, io +from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager + class LargeFileManager(FileContentsManager): """Handle large file upload.""" @@ -71,3 +73,69 @@ def _save_large_file(self, os_path, content, format): with io.open(os_path, 'ab') as f: f.write(bcontent) + +class AsyncLargeFileManager(AsyncFileContentsManager): + """Handle large file upload asynchronously""" + + async def save(self, model, path=''): + """Save the file model and return the model with no content.""" + chunk = model.get('chunk', None) + if chunk is not None: + path = path.strip('/') + + if 'type' not in model: + raise web.HTTPError(400, u'No file type provided') + if model['type'] != 'file': + raise web.HTTPError(400, u'File type "{}" is not supported for large file transfer'.format(model['type'])) + if 'content' not in model and model['type'] != 'directory': + raise web.HTTPError(400, u'No file content provided') + + os_path = self._get_os_path(path) + + try: + if chunk == 1: + self.log.debug("Saving %s", os_path) + self.run_pre_save_hook(model=model, path=path) + await super(AsyncLargeFileManager, self)._save_file(os_path, model['content'], model.get('format')) + else: + await self._save_large_file(os_path, model['content'], model.get('format')) + except web.HTTPError: + raise + except Exception as e: + self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) + raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' % + (path, e)) from e + + model = await self.get(path, content=False) + + # Last chunk + if chunk == -1: + self.run_post_save_hook(model=model, os_path=os_path) + return model + else: + return await super(AsyncLargeFileManager, self).save(model, path) + + async def _save_large_file(self, os_path, content, format): + """Save content of a generic file.""" + if format not in {'text', 'base64'}: + raise web.HTTPError( + 400, + "Must specify format of file contents as 'text' or 'base64'", + ) + try: + if format == 'text': + bcontent = content.encode('utf8') + else: + b64_bytes = content.encode('ascii') + bcontent = base64.b64decode(b64_bytes) + except Exception as e: + raise web.HTTPError( + 400, u'Encoding error saving %s: %s' % (os_path, e) + ) from e + + with self.perm_to_403(os_path): + if os.path.islink(os_path): + os_path = os.path.join(os.path.dirname(os_path), os.readlink(os_path)) + with io.open(os_path, 'ab') as f: + await run_sync_in_worker_thread(f.write, bcontent) + diff --git a/tests/services/contents/test_largefilemanager.py b/tests/services/contents/test_largefilemanager.py index f06f31dfb..bd3dfdd7a 100644 --- a/tests/services/contents/test_largefilemanager.py +++ b/tests/services/contents/test_largefilemanager.py @@ -1,19 +1,28 @@ import pytest import tornado +from jupyter_server.services.contents.largefilemanager import AsyncLargeFileManager, LargeFileManager +from jupyter_server.utils import ensure_async from ...utils import expected_http_error -def test_save(jp_large_contents_manager): +@pytest.fixture(params=[LargeFileManager, AsyncLargeFileManager]) +def jp_large_contents_manager(request, tmp_path): + """Returns a LargeFileManager instance.""" + file_manager = request.param + return file_manager(root_dir=str(tmp_path)) + + +async def test_save(jp_large_contents_manager): cm = jp_large_contents_manager - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Get the model with 'content' - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) # Save the notebook - model = cm.save(full_model, path) + model = await ensure_async(cm.save(full_model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -43,26 +52,26 @@ def test_save(jp_large_contents_manager): ) ] ) -def test_bad_save(jp_large_contents_manager, model, err_message): +async def test_bad_save(jp_large_contents_manager, model, err_message): with pytest.raises(tornado.web.HTTPError) as e: - jp_large_contents_manager.save(model, model['path']) + await ensure_async(jp_large_contents_manager.save(model, model['path'])) assert expected_http_error(e, 400, expected_message=err_message) -def test_saving_different_chunks(jp_large_contents_manager): +async def test_saving_different_chunks(jp_large_contents_manager): cm = jp_large_contents_manager model = {'name': 'test', 'path': 'test', 'type': 'file', 'content': u'test==', 'format': 'text'} name = model['name'] path = model['path'] - cm.save(model, path) + await ensure_async(cm.save(model, path)) for chunk in (1, 2, -1): for fm in ('text', 'base64'): - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) full_model['chunk'] = chunk full_model['format'] = fm - model_res = cm.save(full_model, path) + model_res = await ensure_async(cm.save(full_model, path)) assert isinstance(model_res, dict) assert 'name' in model_res assert 'path' in model_res @@ -71,16 +80,16 @@ def test_saving_different_chunks(jp_large_contents_manager): assert model_res['path'] == path -def test_save_in_subdirectory(jp_large_contents_manager, tmp_path): +async def test_save_in_subdirectory(jp_large_contents_manager, tmp_path): cm = jp_large_contents_manager sub_dir = tmp_path / 'foo' sub_dir.mkdir() - model = cm.new_untitled(path='/foo/', type='notebook') + model = await ensure_async(cm.new_untitled(path='/foo/', type='notebook')) path = model['path'] - model = cm.get(path) + model = await ensure_async(cm.get(path)) # Change the name in the model for rename - model = cm.save(model, path) + model = await ensure_async(cm.save(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model From 564325e9ba048c203c1cde83a47390328166821f Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 20 Nov 2020 10:47:51 -0700 Subject: [PATCH 10/15] Create AsyncGenericFileCheckpoints --- .../services/contents/filecheckpoints.py | 66 +++++++++++++++++++ tests/services/contents/test_config.py | 13 ++-- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index 5910b6076..757429a21 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -9,6 +9,7 @@ from .checkpoints import ( AsyncCheckpoints, Checkpoints, + AsyncGenericCheckpointsMixin, GenericCheckpointsMixin, ) from .fileio import AsyncFileManagerMixin, FileManagerMixin @@ -266,3 +267,68 @@ def get_file_checkpoint(self, checkpoint_id, path): 'content': content, 'format': format, } + + +class AsyncGenericFileCheckpoints(AsyncGenericCheckpointsMixin, AsyncFileCheckpoints): + """ + Asynchronous Local filesystem Checkpoints that works with any conforming + ContentsManager. + """ + async def create_file_checkpoint(self, content, format, path): + """Create a checkpoint from the current content of a file.""" + path = path.strip('/') + # only the one checkpoint ID: + checkpoint_id = u"checkpoint" + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + self.log.debug("creating checkpoint for %s", path) + with self.perm_to_403(): + await self._save_file(os_checkpoint_path, content, format=format) + + # return the checkpoint info + return await self.checkpoint_model(checkpoint_id, os_checkpoint_path) + + async def create_notebook_checkpoint(self, nb, path): + """Create a checkpoint from the current content of a notebook.""" + path = path.strip('/') + # only the one checkpoint ID: + checkpoint_id = u"checkpoint" + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + self.log.debug("creating checkpoint for %s", path) + with self.perm_to_403(): + await self._save_notebook(os_checkpoint_path, nb) + + # return the checkpoint info + return await self.checkpoint_model(checkpoint_id, os_checkpoint_path) + + async def get_notebook_checkpoint(self, checkpoint_id, path): + """Get a checkpoint for a notebook.""" + path = path.strip('/') + self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + + if not os.path.isfile(os_checkpoint_path): + self.no_such_checkpoint(path, checkpoint_id) + + return { + 'type': 'notebook', + 'content': await self._read_notebook( + os_checkpoint_path, + as_version=4, + ), + } + + async def get_file_checkpoint(self, checkpoint_id, path): + """Get a checkpoint for a file.""" + path = path.strip('/') + self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + + if not os.path.isfile(os_checkpoint_path): + self.no_such_checkpoint(path, checkpoint_id) + + content, format = await self._read_file(os_checkpoint_path, format=None) + return { + 'type': 'file', + 'content': content, + 'format': format, + } diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 06159d0b5..7fb2289ea 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,17 +1,18 @@ import pytest from jupyter_server.services.contents.checkpoints import AsyncCheckpoints -from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints +from jupyter_server.services.contents.filecheckpoints import AsyncGenericFileCheckpoints, GenericFileCheckpoints from jupyter_server.services.contents.manager import AsyncContentsManager -@pytest.fixture -def jp_server_config(): - return {'FileContentsManager': {'checkpoints_class': GenericFileCheckpoints}} +@pytest.fixture(params=[AsyncGenericFileCheckpoints, GenericFileCheckpoints]) +def jp_server_config(request): + return {'FileContentsManager': {'checkpoints_class': request.param}} -def test_config_did_something(jp_serverapp): - assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) +def test_config_did_something(jp_server_config, jp_serverapp): + assert isinstance(jp_serverapp.contents_manager.checkpoints, + jp_server_config['FileContentsManager']['checkpoints_class']) async def test_async_contents_manager(jp_configurable_serverapp): From 188fc3c1e2c505ce6e1ce66887eb002979d7f68b Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 3 Dec 2020 10:26:22 -0700 Subject: [PATCH 11/15] Make handlers compatible with async CMs --- jupyter_server/base/handlers.py | 10 +++++----- jupyter_server/files/handlers.py | 6 +++--- jupyter_server/nbconvert/handlers.py | 5 +++-- jupyter_server/services/contents/filemanager.py | 1 - jupyter_server/services/contents/handlers.py | 4 ++-- jupyter_server/services/contents/largefilemanager.py | 3 +-- jupyter_server/services/contents/manager.py | 4 ++-- jupyter_server/view/handlers.py | 6 +++--- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 32a348607..57a37bda8 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -31,7 +31,7 @@ import jupyter_server from jupyter_server._tz import utcnow from jupyter_server.i18n import combine_translations -from jupyter_server.utils import is_hidden, url_path_join, url_is_absolute, url_escape +from jupyter_server.utils import ensure_async, is_hidden, url_path_join, url_is_absolute, url_escape from jupyter_server.services.security import csp_report_uri #----------------------------------------------------------------------------- @@ -800,13 +800,13 @@ class FilesRedirectHandler(JupyterHandler): """Handler for redirecting relative URLs to the /files/ handler""" @staticmethod - def redirect_to_files(self, path): + async def redirect_to_files(self, path): """make redirect logic a reusable static method so it can be called from other handlers. """ cm = self.contents_manager - if cm.dir_exists(path): + if await ensure_async(cm.dir_exists(path)): # it's a *directory*, redirect to /tree url = url_path_join(self.base_url, 'tree', url_escape(path)) else: @@ -814,14 +814,14 @@ def redirect_to_files(self, path): # otherwise, redirect to /files parts = path.split('/') - if not cm.file_exists(path=path) and 'files' in parts: + if not await ensure_async(cm.file_exists(path=path)) and 'files' in parts: # redirect without files/ iff it would 404 # this preserves pre-2.0-style 'files/' links self.log.warning("Deprecated files/ URL: %s", orig_path) parts.remove('files') path = '/'.join(parts) - if not cm.file_exists(path=path): + if not await ensure_async(cm.file_exists(path=path)): raise web.HTTPError(404) url = url_path_join(self.base_url, 'files', url_escape(path)) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index 51b990f23..e73c445c6 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -8,7 +8,7 @@ from base64 import decodebytes from tornado import web from jupyter_server.base.handlers import JupyterHandler - +from jupyter_server.utils import ensure_async class FilesHandler(JupyterHandler): """serve files via ContentsManager @@ -34,7 +34,7 @@ def head(self, path): async def get(self, path, include_body=True): cm = self.contents_manager - if cm.is_hidden(path) and not cm.allow_hidden: + if await ensure_async(cm.is_hidden(path)) and not cm.allow_hidden: self.log.info("Refusing to serve hidden file, via 404 Error") raise web.HTTPError(404) @@ -44,7 +44,7 @@ async def get(self, path, include_body=True): else: name = path - model = await cm.get(path, type='file', content=include_body) + model = await ensure_async(cm.get(path, type='file', content=include_body)) if self.get_argument("download", False): self.set_attachment_header(name) diff --git a/jupyter_server/nbconvert/handlers.py b/jupyter_server/nbconvert/handlers.py index 0c962e3c1..550d7bace 100644 --- a/jupyter_server/nbconvert/handlers.py +++ b/jupyter_server/nbconvert/handlers.py @@ -14,6 +14,7 @@ JupyterHandler, FilesRedirectHandler, path_regex, ) +from jupyter_server.utils import ensure_async from nbformat import from_dict from ipython_genutils.py3compat import cast_bytes @@ -80,7 +81,7 @@ class NbconvertFileHandler(JupyterHandler): SUPPORTED_METHODS = ('GET',) @web.authenticated - def get(self, format, path): + async def get(self, format, path): exporter = get_exporter(format, config=self.config, log=self.log) @@ -93,7 +94,7 @@ def get(self, format, path): else: ext_resources_dir = None - model = self.contents_manager.get(path=path) + model = await ensure_async(self.contents_manager.get(path=path)) name = model['name'] if model['type'] != 'notebook': # not a notebook, redirect to files diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 3bc590523..e6fc1e53c 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -5,7 +5,6 @@ from datetime import datetime import errno -import io import os import shutil import stat diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 70e11366b..7ba6bd2d5 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -176,7 +176,7 @@ async def post(self, path=''): cm = self.contents_manager - file_exists = cm.file_exists(path) + file_exists = await ensure_async(cm.file_exists(path)) if file_exists: raise web.HTTPError(400, "Cannot POST to files, use PUT instead.") @@ -213,7 +213,7 @@ async def put(self, path=''): if model: if model.get('copy_from'): raise web.HTTPError(400, "Cannot copy with PUT, only POST") - exists = self.contents_manager.file_exists(path) + exists = await ensure_async(self.contents_manager.file_exists(path)) if exists: await self._save(model, path) else: diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index e48eeae91..b2c7a2fd7 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -1,7 +1,5 @@ from anyio import run_sync_in_worker_thread -from contextlib import contextmanager from tornado import web -import nbformat import base64 import os, io @@ -139,3 +137,4 @@ async def _save_large_file(self, os_path, content, format): with io.open(os_path, 'ab') as f: await run_sync_in_worker_thread(f.write, bcontent) + diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 4868c2b96..c91c4493c 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -623,7 +623,7 @@ async def exists(self, path): exists : bool Whether the target exists. """ - return await (self.file_exists(path) or self.dir_exists(path)) + return await (ensure_async(self.file_exists(path)) or ensure_async(self.dir_exists(path))) async def get(self, path, content=True, type=None, format=None): """Get a file or directory model.""" @@ -807,7 +807,7 @@ async def copy(self, from_path, to_path=None): raise HTTPError(400, "Can't copy directories") if to_path is None: to_path = from_dir - if self.dir_exists(to_path): + if await ensure_async(self.dir_exists(to_path)): name = copy_pat.sub(u'.', from_name) to_name = await self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) diff --git a/jupyter_server/view/handlers.py b/jupyter_server/view/handlers.py index 5663d4db3..76f5a65b2 100644 --- a/jupyter_server/view/handlers.py +++ b/jupyter_server/view/handlers.py @@ -6,15 +6,15 @@ from tornado import web from ..base.handlers import JupyterHandler, path_regex -from ..utils import url_escape, url_path_join +from ..utils import ensure_async, url_escape, url_path_join class ViewHandler(JupyterHandler): """Render HTML files within an iframe.""" @web.authenticated - def get(self, path): + async def get(self, path): path = path.strip('/') - if not self.contents_manager.file_exists(path): + if not await ensure_async(self.contents_manager.file_exists(path)): raise web.HTTPError(404, u'File does not exist: %s' % path) basename = path.rsplit('/', 1)[-1] From b19edc00708cbda23311212d5f04b71262fedf27 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 10:21:57 -0700 Subject: [PATCH 12/15] update docs and configs --- docs/source/developers/contents.rst | 15 +- docs/source/other/full-config.rst | 364 +++++++++++++++++++++++++++- 2 files changed, 373 insertions(+), 6 deletions(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index 21472d91d..49429b447 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -269,11 +269,22 @@ ContentsManager. .. _PostgreSQL: https://www.postgresql.org/ Asynchronous Support -~~~~~~~~~~~~~~~~~~~~ +-------------------- -To execute file operations asynchronously in a virtual filesystem, the following classes are available. +An asynchronous version of the Contents API is available to run slow IO processes concurrently. - :class:`~manager.AsyncContentsManager` - :class:`~filemanager.AsyncFileContentsManager` +- :class:`~largefilemanager.AsyncLargeFileManager` - :class:`~checkpoints.AsyncCheckpoints` +- :class:`~checkpoints.AsyncGenericCheckpointsMixin` + +.. note:: + + .. _contentfree: + + In most cases, the non-asynchronous Contents API is performant for local filesystems. + However, if the Jupyter Notebook web application is interacting with a high-latent virtual filesystem, you may see performance gains by using the asynchronous version. + For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version may be a good option. + Before opting in, comparing both non-async and async options' performances is recommended. diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index 769973538..8e0a3b1c4 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -41,11 +41,21 @@ Application.log_format : Unicode The Logging format template -Application.log_level : 0|10|20|30|40|50|'DEBUG'|'INFO'|'WARN'|'ERROR'|'CRITICAL' +Application.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` Default: ``30`` Set the log level by value or name. +Application.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +Application.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + JupyterApp.answer_yes : Bool Default: ``False`` @@ -66,6 +76,31 @@ JupyterApp.generate_config : Bool Generate default config file. +JupyterApp.log_datefmt : Unicode + Default: ``'%Y-%m-%d %H:%M:%S'`` + + The date format used by logging formatters for %(asctime)s + +JupyterApp.log_format : Unicode + Default: ``'[%(name)s]%(highlevel)s %(message)s'`` + + The Logging format template + +JupyterApp.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` + Default: ``30`` + + Set the log level by value or name. + +JupyterApp.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +JupyterApp.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + ServerApp.allow_credentials : Bool Default: ``False`` @@ -129,6 +164,11 @@ ServerApp.allow_root : Bool Whether to allow the user to run the server as root. +ServerApp.answer_yes : Bool + Default: ``False`` + + Answer yes to any prompts. + ServerApp.base_url : Unicode Default: ``'/'`` @@ -158,6 +198,16 @@ ServerApp.client_ca : Unicode The full path to a certificate authority certificate for SSL/TLS client authentication. +ServerApp.config_file : Unicode + Default: ``''`` + + Full path of a config file. + +ServerApp.config_file_name : Unicode + Default: ``''`` + + Specify a config file to load. + ServerApp.config_manager_class : Type Default: ``'jupyter_server.services.config.manager.ConfigManager'`` @@ -250,6 +300,11 @@ ServerApp.file_to_run : Unicode No description +ServerApp.generate_config : Bool + Default: ``False`` + + Generate default config file. + ServerApp.get_secure_cookie_kwargs : Dict Default: ``{}`` @@ -319,6 +374,21 @@ ServerApp.local_hostnames : List as local as well. +ServerApp.log_datefmt : Unicode + Default: ``'%Y-%m-%d %H:%M:%S'`` + + The date format used by logging formatters for %(asctime)s + +ServerApp.log_format : Unicode + Default: ``'[%(name)s]%(highlevel)s %(message)s'`` + + The Logging format template + +ServerApp.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` + Default: ``30`` + + Set the log level by value or name. + ServerApp.login_handler_class : Type Default: ``'jupyter_server.auth.login.LoginHandler'`` @@ -431,6 +501,16 @@ ServerApp.session_manager_class : Type The session manager class to use. +ServerApp.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +ServerApp.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + ServerApp.shutdown_no_activity_timeout : Int Default: ``0`` @@ -560,7 +640,7 @@ ConnectionFileMixin.stdin_port : Int set the stdin (ROUTER) port [default: random] -ConnectionFileMixin.transport : 'tcp'|'ipc' +ConnectionFileMixin.transport : any of ``'tcp'``|``'ipc'`` (case-insensitive) Default: ``'tcp'`` No description @@ -570,6 +650,39 @@ KernelManager.autorestart : Bool Should we autorestart the kernel if it dies. +KernelManager.connection_file : Unicode + Default: ``''`` + + JSON file in which to store connection info [default: kernel-.json] + + This file will contain the IP, ports, and authentication key needed to connect + clients to this kernel. By default, this file will be created in the security dir + of the current profile, but can be specified by absolute path. + + +KernelManager.control_port : Int + Default: ``0`` + + set the control (ROUTER) port [default: random] + +KernelManager.hb_port : Int + Default: ``0`` + + set the heartbeat port [default: random] + +KernelManager.iopub_port : Int + Default: ``0`` + + set the iopub (PUB) port [default: random] + +KernelManager.ip : Unicode + Default: ``''`` + + Set the kernel's IP address [default localhost]. + If the IP address is something other than localhost, then + Consoles on other machines will be able to connect + to the Kernel, so be careful! + KernelManager.kernel_cmd : List Default: ``[]`` @@ -585,11 +698,26 @@ KernelManager.kernel_cmd : List option --debug if it given on the Jupyter command line. +KernelManager.shell_port : Int + Default: ``0`` + + set the shell (ROUTER) port [default: random] + KernelManager.shutdown_wait_time : Float Default: ``5.0`` Time to wait for a kernel to terminate before killing it, in seconds. +KernelManager.stdin_port : Int + Default: ``0`` + + set the stdin (ROUTER) port [default: random] + +KernelManager.transport : any of ``'tcp'``|``'ipc'`` (case-insensitive) + Default: ``'tcp'`` + + No description + Session.buffer_threshold : Int Default: ``1024`` @@ -668,7 +796,7 @@ Session.unpacker : DottedObjectName Only used with custom functions for `packer`. Session.username : Unicode - Default: ``'echar4'`` + Default: ``'mwakabayashi'`` Username for the Session. Default is your system username. @@ -689,6 +817,11 @@ MultiKernelManager.shared_context : Bool Share a single zmq.Context to talk to all my kernels +MappingKernelManager.allow_tracebacks : Bool + Default: ``True`` + + Whether to send tracebacks to clients on exceptions. + MappingKernelManager.allowed_message_types : List Default: ``[]`` @@ -732,6 +865,11 @@ MappingKernelManager.cull_interval : Int The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value. +MappingKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + MappingKernelManager.kernel_info_timeout : Float Default: ``60`` @@ -745,11 +883,28 @@ MappingKernelManager.kernel_info_timeout : Float and the ZMQChannelsHandler (which handles the startup). +MappingKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.IOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the KernelManager for customized behavior. + + MappingKernelManager.root_dir : Unicode Default: ``''`` No description +MappingKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +MappingKernelManager.traceback_replacement_message : Unicode + Default: ``'An exception occurred at runtime, which is not shown due to ...`` + + Message to print when allow_tracebacks is False, and an exception occurs + KernelSpecManager.ensure_native_kernel : Bool Default: ``True`` @@ -870,6 +1025,26 @@ FileManagerMixin.use_atomic_writing : Bool This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) +FileContentsManager.allow_hidden : Bool + Default: ``False`` + + Allow access to hidden files + +FileContentsManager.checkpoints : Instance + Default: ``None`` + + No description + +FileContentsManager.checkpoints_class : Type + Default: ``'jupyter_server.services.contents.checkpoints.Checkpoints'`` + + No description + +FileContentsManager.checkpoints_kwargs : Dict + Default: ``{}`` + + No description + FileContentsManager.delete_to_trash : Bool Default: ``True`` @@ -877,6 +1052,36 @@ FileContentsManager.delete_to_trash : Bool platform's trash/recycle bin, where they can be recovered. If False, deleting files really deletes them. +FileContentsManager.files_handler_class : Type + Default: ``'jupyter_server.files.handlers.FilesHandler'`` + + handler class to use when serving raw file requests. + + Default is a fallback that talks to the ContentsManager API, + which may be inefficient, especially for large files. + + Local files-based ContentsManagers can use a StaticFileHandler subclass, + which will be much more efficient. + + Access to these files should be Authenticated. + + +FileContentsManager.files_handler_params : Dict + Default: ``{}`` + + Extra parameters to pass to files_handler_class. + + For example, StaticFileHandlers generally expect a `path` argument + specifying the root directory from which to serve files. + + +FileContentsManager.hide_globs : List + Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` + + + Glob patterns to hide in file and directory listings. + + FileContentsManager.post_save_hook : Any Default: ``None`` @@ -896,12 +1101,55 @@ FileContentsManager.post_save_hook : Any - contents_manager: this ContentsManager instance +FileContentsManager.pre_save_hook : Any + Default: ``None`` + + Python callable or importstring thereof + + To be called on a contents model prior to save. + + This can be used to process the structure, + such as removing notebook outputs or other side effects that + should not be saved. + + It will be called as (all arguments passed by keyword):: + + hook(path=path, model=model, contents_manager=self) + + - model: the model to be saved. Includes file contents. + Modifying this dict will affect the file that is stored. + - path: the API path of the save destination + - contents_manager: this ContentsManager instance + + FileContentsManager.root_dir : Unicode Default: ``''`` No description -NotebookNotary.algorithm : 'md5'|'sha3_512'|'blake2b'|'sha3_384'|'sha3_256'|'sha224'|'sha3_224'|'sha384'|'sha512'|'blake2s'|'sha1'|'sha256' +FileContentsManager.untitled_directory : Unicode + Default: ``'Untitled Folder'`` + + The base name used when creating untitled directories. + +FileContentsManager.untitled_file : Unicode + Default: ``'untitled'`` + + The base name used when creating untitled files. + +FileContentsManager.untitled_notebook : Unicode + Default: ``'Untitled'`` + + The base name used when creating untitled notebooks. + +FileContentsManager.use_atomic_writing : Bool + Default: ``True`` + + By default notebooks are saved on disk on a temporary file and then if succefully written, it replaces the old ones. + This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). + If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) + +NotebookNotary.algorithm : any of ``'blake2s'``|``'sha512'``|``'md5'``|``'sha3_512'``|``'sha3_224'``|``'blake2b'``|``'sha384'``|``'sha1'``|``'sha3_256'``|``'sha256'``|``'sha224'``|``'sha3_384'`` Default: ``'sha256'`` The hashing algorithm used to sign notebooks. @@ -930,6 +1178,114 @@ NotebookNotary.store_factory : Callable A callable returning the storage backend for notebook signatures. The default uses an SQLite database. +GatewayKernelManager.allow_tracebacks : Bool + Default: ``True`` + + Whether to send tracebacks to clients on exceptions. + +GatewayKernelManager.allowed_message_types : List + Default: ``[]`` + + White list of allowed kernel message types. + When the list is empty, all message types are allowed. + + +GatewayKernelManager.buffer_offline_messages : Bool + Default: ``True`` + + Whether messages from kernels whose frontends have disconnected should be buffered in-memory. + + When True (default), messages are buffered and replayed on reconnect, + avoiding lost messages due to interrupted connectivity. + + Disable if long-running kernels will produce too much output while + no frontends are connected. + + +GatewayKernelManager.cull_busy : Bool + Default: ``False`` + + Whether to consider culling kernels which are busy. + Only effective if cull_idle_timeout > 0. + +GatewayKernelManager.cull_connected : Bool + Default: ``False`` + + Whether to consider culling kernels which have one or more connections. + Only effective if cull_idle_timeout > 0. + +GatewayKernelManager.cull_idle_timeout : Int + Default: ``0`` + + Timeout (in seconds) after which a kernel is considered idle and ready to be culled. + Values of 0 or lower disable culling. Very short timeouts may result in kernels being culled + for users with poor network connections. + +GatewayKernelManager.cull_interval : Int + Default: ``300`` + + The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value. + +GatewayKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + +GatewayKernelManager.kernel_info_timeout : Float + Default: ``60`` + + Timeout for giving up on a kernel (in seconds). + + On starting and restarting kernels, we check whether the + kernel is running and responsive by sending kernel_info_requests. + This sets the timeout in seconds for how long the kernel can take + before being presumed dead. + This affects the MappingKernelManager (which handles kernel restarts) + and the ZMQChannelsHandler (which handles the startup). + + +GatewayKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.IOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the KernelManager for customized behavior. + + +GatewayKernelManager.root_dir : Unicode + Default: ``''`` + + No description + +GatewayKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +GatewayKernelManager.traceback_replacement_message : Unicode + Default: ``'An exception occurred at runtime, which is not shown due to ...`` + + Message to print when allow_tracebacks is False, and an exception occurs + +GatewayKernelSpecManager.ensure_native_kernel : Bool + Default: ``True`` + + If there is no Python kernelspec registered and the IPython + kernel is available, ensure it is added to the spec list. + + +GatewayKernelSpecManager.kernel_spec_class : Type + Default: ``'jupyter_client.kernelspec.KernelSpec'`` + + The kernel spec class. This is configurable to allow + subclassing of the KernelSpecManager for customized behavior. + + +GatewayKernelSpecManager.whitelist : Set + Default: ``set()`` + + Whitelist of allowed kernel names. + + By default, all installed kernels are allowed. From 01500bdafbab4452f5a3180a86c68387f0ac05e3 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 11:48:12 -0700 Subject: [PATCH 13/15] Checkpoints: Make all not implemented methods async --- .../services/contents/checkpoints.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/jupyter_server/services/contents/checkpoints.py b/jupyter_server/services/contents/checkpoints.py index 16460d520..4e86b4c0e 100644 --- a/jupyter_server/services/contents/checkpoints.py +++ b/jupyter_server/services/contents/checkpoints.py @@ -146,6 +146,25 @@ class AsyncCheckpoints(Checkpoints): """ Base class for managing checkpoints for a ContentsManager asynchronously. """ + async def create_checkpoint(self, contents_mgr, path): + """Create a checkpoint.""" + raise NotImplementedError("must be implemented in a subclass") + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint""" + raise NotImplementedError("must be implemented in a subclass") + + async def rename_checkpoint(self, checkpoint_id, old_path, new_path): + """Rename a single checkpoint from old_path to new_path.""" + raise NotImplementedError("must be implemented in a subclass") + + async def delete_checkpoint(self, checkpoint_id, path): + """delete a checkpoint for a file""" + raise NotImplementedError("must be implemented in a subclass") + + async def list_checkpoints(self, path): + """Return a list of checkpoints for a given file""" + raise NotImplementedError("must be implemented in a subclass") async def rename_all_checkpoints(self, old_path, new_path): """Rename all checkpoints for old_path to new_path.""" @@ -191,3 +210,41 @@ async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): else: raise HTTPError(500, u'Unexpected type %s' % type) await contents_mgr.save(model, path) + + # Required Methods + async def create_file_checkpoint(self, content, format, path): + """Create a checkpoint of the current state of a file + + Returns a checkpoint model for the new checkpoint. + """ + raise NotImplementedError("must be implemented in a subclass") + + async def create_notebook_checkpoint(self, nb, path): + """Create a checkpoint of the current state of a file + + Returns a checkpoint model for the new checkpoint. + """ + raise NotImplementedError("must be implemented in a subclass") + + async def get_file_checkpoint(self, checkpoint_id, path): + """Get the content of a checkpoint for a non-notebook file. + + Returns a dict of the form: + { + 'type': 'file', + 'content': , + 'format': {'text','base64'}, + } + """ + raise NotImplementedError("must be implemented in a subclass") + + async def get_notebook_checkpoint(self, checkpoint_id, path): + """Get the content of a checkpoint for a notebook. + + Returns a dict of the form: + { + 'type': 'notebook', + 'content': , + } + """ + raise NotImplementedError("must be implemented in a subclass") From bd057ef13dabd47f79826124b8020f14b374cd0f Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 11:55:04 -0700 Subject: [PATCH 14/15] Update docs --- docs/source/developers/contents.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index 49429b447..f5ac3c83a 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -285,6 +285,6 @@ An asynchronous version of the Contents API is available to run slow IO processe In most cases, the non-asynchronous Contents API is performant for local filesystems. However, if the Jupyter Notebook web application is interacting with a high-latent virtual filesystem, you may see performance gains by using the asynchronous version. - For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version may be a good option. + For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version can reduce the lag. Before opting in, comparing both non-async and async options' performances is recommended. From 7d863f35c9c1e5dc8a58ec8af373bbb1b5158475 Mon Sep 17 00:00:00 2001 From: Zachary Sailer Date: Thu, 10 Dec 2020 10:58:22 -0800 Subject: [PATCH 15/15] Update jupyter_server/services/contents/fileio.py --- jupyter_server/pytest_plugin.py | 3 --- jupyter_server/services/contents/fileio.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py index b8454a6c4..dee6d3f0d 100644 --- a/jupyter_server/pytest_plugin.py +++ b/jupyter_server/pytest_plugin.py @@ -19,7 +19,6 @@ from jupyter_server.extension import serverextension from jupyter_server.serverapp import ServerApp from jupyter_server.utils import url_path_join -<<<<<<< HEAD from jupyter_server.services.contents.filemanager import FileContentsManager from jupyter_server.services.contents.largefilemanager import LargeFileManager @@ -45,8 +44,6 @@ def mkdir(tmp_path, *parts): def jp_home_dir(tmp_path): """Provides a temporary HOME directory value.""" return mkdir(tmp_path, "home") -======= ->>>>>>> Use jp prefix for fixtures @pytest.fixture diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 531715eb6..3311f455b 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -202,7 +202,7 @@ class FileManagerMixin(Configurable): @contextmanager def open(self, os_path, *args, **kwargs): - """wrapper around open that turns permission errors into 403""" + """wrapper around io.open that turns permission errors into 403""" with self.perm_to_403(os_path): with io.open(os_path, *args, **kwargs) as f: yield f