forked from ipython/ipython
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request ipython#2045 from ellisonbg/azurenb
Refactoring notebook managers and adding Azure backed storage.
- Loading branch information
Showing
7 changed files
with
438 additions
and
143 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
"""A notebook manager that uses Azure blob storage. | ||
Authors: | ||
* Brian Granger | ||
""" | ||
|
||
#----------------------------------------------------------------------------- | ||
# Copyright (C) 2012 The IPython Development Team | ||
# | ||
# Distributed under the terms of the BSD License. The full license is in | ||
# the file COPYING, distributed as part of this software. | ||
#----------------------------------------------------------------------------- | ||
|
||
#----------------------------------------------------------------------------- | ||
# Imports | ||
#----------------------------------------------------------------------------- | ||
|
||
import datetime | ||
|
||
import azure | ||
from azure.storage import BlobService | ||
|
||
from tornado import web | ||
|
||
from .basenbmanager import BaseNotebookManager | ||
from IPython.nbformat import current | ||
from IPython.utils.traitlets import Unicode, Instance | ||
|
||
|
||
#----------------------------------------------------------------------------- | ||
# Classes | ||
#----------------------------------------------------------------------------- | ||
|
||
class AzureNotebookManager(BaseNotebookManager): | ||
|
||
account_name = Unicode('', config=True, help='Azure storage account name.') | ||
account_key = Unicode('', config=True, help='Azure storage account key.') | ||
container = Unicode('', config=True, help='Container name for notebooks.') | ||
|
||
blob_service_host_base = Unicode('.blob.core.windows.net', config=True, | ||
help='The basename for the blob service URL. If running on the preview site this ' | ||
'will be .blob.core.azure-preview.com.') | ||
def _blob_service_host_base_changed(self, new): | ||
self._update_service_host_base(new) | ||
|
||
blob_service = Instance('azure.storage.BlobService') | ||
def _blob_service_default(self): | ||
return BlobService(account_name=self.account_name, account_key=self.account_key) | ||
|
||
def __init__(self, **kwargs): | ||
super(AzureNotebookManager, self).__init__(**kwargs) | ||
self._update_service_host_base(self.blob_service_host_base) | ||
self._create_container() | ||
|
||
def _update_service_host_base(self, shb): | ||
azure.BLOB_SERVICE_HOST_BASE = shb | ||
|
||
def _create_container(self): | ||
self.blob_service.create_container(self.container) | ||
|
||
def load_notebook_names(self): | ||
"""On startup load the notebook ids and names from Azure. | ||
The blob names are the notebook ids and the notebook names are stored | ||
as blob metadata. | ||
""" | ||
self.mapping = {} | ||
blobs = self.blob_service.list_blobs(self.container) | ||
ids = [blob.name for blob in blobs] | ||
|
||
for id in ids: | ||
md = self.blob_service.get_blob_metadata(self.container, id) | ||
name = md['x-ms-meta-nbname'] | ||
self.mapping[id] = name | ||
|
||
def list_notebooks(self): | ||
"""List all notebooks in the container. | ||
This version uses `self.mapping` as the authoritative notebook list. | ||
""" | ||
data = [dict(notebook_id=id,name=name) for id, name in self.mapping.items()] | ||
data = sorted(data, key=lambda item: item['name']) | ||
return data | ||
|
||
def read_notebook_object(self, notebook_id): | ||
"""Get the object representation of a notebook by notebook_id.""" | ||
if not self.notebook_exists(notebook_id): | ||
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
try: | ||
s = self.blob_service.get_blob(self.container, notebook_id) | ||
except: | ||
raise web.HTTPError(500, u'Notebook cannot be read.') | ||
try: | ||
# v1 and v2 and json in the .ipynb files. | ||
nb = current.reads(s, u'json') | ||
except: | ||
raise web.HTTPError(500, u'Unreadable JSON notebook.') | ||
# Todo: The last modified should actually be saved in the notebook document. | ||
# We are just using the current datetime until that is implemented. | ||
last_modified = datetime.datetime.utcnow() | ||
return last_modified, nb | ||
|
||
def write_notebook_object(self, nb, notebook_id=None): | ||
"""Save an existing notebook object by notebook_id.""" | ||
try: | ||
new_name = nb.metadata.name | ||
except AttributeError: | ||
raise web.HTTPError(400, u'Missing notebook name') | ||
|
||
if notebook_id is None: | ||
notebook_id = self.new_notebook_id(new_name) | ||
|
||
if notebook_id not in self.mapping: | ||
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
|
||
try: | ||
data = current.writes(nb, u'json') | ||
except Exception as e: | ||
raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' % e) | ||
|
||
metadata = {'nbname': new_name} | ||
try: | ||
self.blob_service.put_blob(self.container, notebook_id, data, 'BlockBlob', x_ms_meta_name_values=metadata) | ||
except Exception as e: | ||
raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' % e) | ||
|
||
self.mapping[notebook_id] = new_name | ||
return notebook_id | ||
|
||
def delete_notebook(self, notebook_id): | ||
"""Delete notebook by notebook_id.""" | ||
if not self.notebook_exists(notebook_id): | ||
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
try: | ||
self.blob_service.delete_blob(self.container, notebook_id) | ||
except Exception as e: | ||
raise web.HTTPError(400, u'Unexpected error while deleting notebook: %s' % e) | ||
else: | ||
self.delete_notebook_id(notebook_id) | ||
|
||
def log_info(self): | ||
self.log.info("Serving notebooks from Azure storage: %s, %s", self.account_name, self.container) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
"""A base class notebook manager. | ||
Authors: | ||
* Brian Granger | ||
""" | ||
|
||
#----------------------------------------------------------------------------- | ||
# Copyright (C) 2011 The IPython Development Team | ||
# | ||
# Distributed under the terms of the BSD License. The full license is in | ||
# the file COPYING, distributed as part of this software. | ||
#----------------------------------------------------------------------------- | ||
|
||
#----------------------------------------------------------------------------- | ||
# Imports | ||
#----------------------------------------------------------------------------- | ||
|
||
import os | ||
import uuid | ||
|
||
from tornado import web | ||
|
||
from IPython.config.configurable import LoggingConfigurable | ||
from IPython.nbformat import current | ||
from IPython.utils.traitlets import List, Dict, Unicode, TraitError | ||
|
||
#----------------------------------------------------------------------------- | ||
# Classes | ||
#----------------------------------------------------------------------------- | ||
|
||
class BaseNotebookManager(LoggingConfigurable): | ||
|
||
# Todo: | ||
# The notebook_dir attribute is used to mean a couple of different things: | ||
# 1. Where the notebooks are stored if FileNotebookManager is used. | ||
# 2. The cwd of the kernel for a project. | ||
# Right now we use this attribute in a number of different places and | ||
# we are going to have to disentagle all of this. | ||
notebook_dir = Unicode(os.getcwdu(), config=True, help=""" | ||
The directory to use for notebooks. | ||
""") | ||
def _notebook_dir_changed(self, name, old, new): | ||
"""do a bit of validation of the notebook dir""" | ||
if os.path.exists(new) and not os.path.isdir(new): | ||
raise TraitError("notebook dir %r is not a directory" % new) | ||
if not os.path.exists(new): | ||
self.log.info("Creating notebook dir %s", new) | ||
try: | ||
os.mkdir(new) | ||
except: | ||
raise TraitError("Couldn't create notebook dir %r" % new) | ||
|
||
allowed_formats = List([u'json',u'py']) | ||
|
||
# Map notebook_ids to notebook names | ||
mapping = Dict() | ||
|
||
def load_notebook_names(self): | ||
"""Load the notebook names into memory. | ||
This should be called once immediately after the notebook manager | ||
is created to load the existing notebooks into the mapping in | ||
memory. | ||
""" | ||
self.list_notebooks() | ||
|
||
def list_notebooks(self): | ||
"""List all notebooks. | ||
This returns a list of dicts, each of the form:: | ||
dict(notebook_id=notebook,name=name) | ||
This list of dicts should be sorted by name:: | ||
data = sorted(data, key=lambda item: item['name']) | ||
""" | ||
raise NotImplementedError('must be implemented in a subclass') | ||
|
||
|
||
def new_notebook_id(self, name): | ||
"""Generate a new notebook_id for a name and store its mapping.""" | ||
# TODO: the following will give stable urls for notebooks, but unless | ||
# the notebooks are immediately redirected to their new urls when their | ||
# filemname changes, nasty inconsistencies result. So for now it's | ||
# disabled and instead we use a random uuid4() call. But we leave the | ||
# logic here so that we can later reactivate it, whhen the necessary | ||
# url redirection code is written. | ||
#notebook_id = unicode(uuid.uuid5(uuid.NAMESPACE_URL, | ||
# 'file://'+self.get_path_by_name(name).encode('utf-8'))) | ||
|
||
notebook_id = unicode(uuid.uuid4()) | ||
self.mapping[notebook_id] = name | ||
return notebook_id | ||
|
||
def delete_notebook_id(self, notebook_id): | ||
"""Delete a notebook's id in the mapping. | ||
This doesn't delete the actual notebook, only its entry in the mapping. | ||
""" | ||
del self.mapping[notebook_id] | ||
|
||
def notebook_exists(self, notebook_id): | ||
"""Does a notebook exist?""" | ||
return notebook_id in self.mapping | ||
|
||
def get_notebook(self, notebook_id, format=u'json'): | ||
"""Get the representation of a notebook in format by notebook_id.""" | ||
format = unicode(format) | ||
if format not in self.allowed_formats: | ||
raise web.HTTPError(415, u'Invalid notebook format: %s' % format) | ||
last_modified, nb = self.read_notebook_object(notebook_id) | ||
kwargs = {} | ||
if format == 'json': | ||
# don't split lines for sending over the wire, because it | ||
# should match the Python in-memory format. | ||
kwargs['split_lines'] = False | ||
data = current.writes(nb, format, **kwargs) | ||
name = nb.get('name','notebook') | ||
return last_modified, name, data | ||
|
||
def read_notebook_object(self, notebook_id): | ||
"""Get the object representation of a notebook by notebook_id.""" | ||
raise NotImplementedError('must be implemented in a subclass') | ||
|
||
def save_new_notebook(self, data, name=None, format=u'json'): | ||
"""Save a new notebook and return its notebook_id. | ||
If a name is passed in, it overrides any values in the notebook data | ||
and the value in the data is updated to use that value. | ||
""" | ||
if format not in self.allowed_formats: | ||
raise web.HTTPError(415, u'Invalid notebook format: %s' % format) | ||
|
||
try: | ||
nb = current.reads(data.decode('utf-8'), format) | ||
except: | ||
raise web.HTTPError(400, u'Invalid JSON data') | ||
|
||
if name is None: | ||
try: | ||
name = nb.metadata.name | ||
except AttributeError: | ||
raise web.HTTPError(400, u'Missing notebook name') | ||
nb.metadata.name = name | ||
|
||
notebook_id = self.write_notebook_object(nb) | ||
return notebook_id | ||
|
||
def save_notebook(self, notebook_id, data, name=None, format=u'json'): | ||
"""Save an existing notebook by notebook_id.""" | ||
if format not in self.allowed_formats: | ||
raise web.HTTPError(415, u'Invalid notebook format: %s' % format) | ||
|
||
try: | ||
nb = current.reads(data.decode('utf-8'), format) | ||
except: | ||
raise web.HTTPError(400, u'Invalid JSON data') | ||
|
||
if name is not None: | ||
nb.metadata.name = name | ||
self.write_notebook_object(nb, notebook_id) | ||
|
||
def write_notebook_object(self, nb, notebook_id=None): | ||
"""Write a notebook object and return its notebook_id. | ||
If notebook_id is None, this method should create a new notebook_id. | ||
If notebook_id is not None, this method should check to make sure it | ||
exists and is valid. | ||
""" | ||
raise NotImplementedError('must be implemented in a subclass') | ||
|
||
def delete_notebook(self, notebook_id): | ||
"""Delete notebook by notebook_id.""" | ||
raise NotImplementedError('must be implemented in a subclass') | ||
|
||
def increment_filename(self, name): | ||
"""Increment a filename to make it unique. | ||
This exists for notebook stores that must have unique names. When a notebook | ||
is created or copied this method constructs a unique filename, typically | ||
by appending an integer to the name. | ||
""" | ||
return name | ||
|
||
def new_notebook(self): | ||
"""Create a new notebook and return its notebook_id.""" | ||
name = self.increment_filename('Untitled') | ||
metadata = current.new_metadata(name=name) | ||
nb = current.new_notebook(metadata=metadata) | ||
notebook_id = self.write_notebook_object(nb) | ||
return notebook_id | ||
|
||
def copy_notebook(self, notebook_id): | ||
"""Copy an existing notebook and return its notebook_id.""" | ||
last_mod, nb = self.read_notebook_object(notebook_id) | ||
name = nb.metadata.name + '-Copy' | ||
name = self.increment_filename(name) | ||
nb.metadata.name = name | ||
notebook_id = self.write_notebook_object(nb) | ||
return notebook_id | ||
|
||
def log_info(self): | ||
self.log.info("Serving notebooks") |
Oops, something went wrong.