Skip to content

Commit

Permalink
Merge pull request ipython#2045 from ellisonbg/azurenb
Browse files Browse the repository at this point in the history
Refactoring notebook managers and adding Azure backed storage.
  • Loading branch information
ellisonbg committed Aug 11, 2012
2 parents c9c58f7 + a2df21b commit 9c05615
Show file tree
Hide file tree
Showing 7 changed files with 438 additions and 143 deletions.
143 changes: 143 additions & 0 deletions IPython/frontend/html/notebook/azurenbmanager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""A notebook manager that uses Azure blob storage.
Authors:
* Brian Granger
"""

#-----------------------------------------------------------------------------
# Copyright (C) 2012 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------

import datetime

import azure
from azure.storage import BlobService

from tornado import web

from .basenbmanager import BaseNotebookManager
from IPython.nbformat import current
from IPython.utils.traitlets import Unicode, Instance


#-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------

class AzureNotebookManager(BaseNotebookManager):

account_name = Unicode('', config=True, help='Azure storage account name.')
account_key = Unicode('', config=True, help='Azure storage account key.')
container = Unicode('', config=True, help='Container name for notebooks.')

blob_service_host_base = Unicode('.blob.core.windows.net', config=True,
help='The basename for the blob service URL. If running on the preview site this '
'will be .blob.core.azure-preview.com.')
def _blob_service_host_base_changed(self, new):
self._update_service_host_base(new)

blob_service = Instance('azure.storage.BlobService')
def _blob_service_default(self):
return BlobService(account_name=self.account_name, account_key=self.account_key)

def __init__(self, **kwargs):
super(AzureNotebookManager, self).__init__(**kwargs)
self._update_service_host_base(self.blob_service_host_base)
self._create_container()

def _update_service_host_base(self, shb):
azure.BLOB_SERVICE_HOST_BASE = shb

def _create_container(self):
self.blob_service.create_container(self.container)

def load_notebook_names(self):
"""On startup load the notebook ids and names from Azure.
The blob names are the notebook ids and the notebook names are stored
as blob metadata.
"""
self.mapping = {}
blobs = self.blob_service.list_blobs(self.container)
ids = [blob.name for blob in blobs]

for id in ids:
md = self.blob_service.get_blob_metadata(self.container, id)
name = md['x-ms-meta-nbname']
self.mapping[id] = name

def list_notebooks(self):
"""List all notebooks in the container.
This version uses `self.mapping` as the authoritative notebook list.
"""
data = [dict(notebook_id=id,name=name) for id, name in self.mapping.items()]
data = sorted(data, key=lambda item: item['name'])
return data

def read_notebook_object(self, notebook_id):
"""Get the object representation of a notebook by notebook_id."""
if not self.notebook_exists(notebook_id):
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
try:
s = self.blob_service.get_blob(self.container, notebook_id)
except:
raise web.HTTPError(500, u'Notebook cannot be read.')
try:
# v1 and v2 and json in the .ipynb files.
nb = current.reads(s, u'json')
except:
raise web.HTTPError(500, u'Unreadable JSON notebook.')
# Todo: The last modified should actually be saved in the notebook document.
# We are just using the current datetime until that is implemented.
last_modified = datetime.datetime.utcnow()
return last_modified, nb

def write_notebook_object(self, nb, notebook_id=None):
"""Save an existing notebook object by notebook_id."""
try:
new_name = nb.metadata.name
except AttributeError:
raise web.HTTPError(400, u'Missing notebook name')

if notebook_id is None:
notebook_id = self.new_notebook_id(new_name)

if notebook_id not in self.mapping:
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)

try:
data = current.writes(nb, u'json')
except Exception as e:
raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' % e)

metadata = {'nbname': new_name}
try:
self.blob_service.put_blob(self.container, notebook_id, data, 'BlockBlob', x_ms_meta_name_values=metadata)
except Exception as e:
raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' % e)

self.mapping[notebook_id] = new_name
return notebook_id

def delete_notebook(self, notebook_id):
"""Delete notebook by notebook_id."""
if not self.notebook_exists(notebook_id):
raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
try:
self.blob_service.delete_blob(self.container, notebook_id)
except Exception as e:
raise web.HTTPError(400, u'Unexpected error while deleting notebook: %s' % e)
else:
self.delete_notebook_id(notebook_id)

def log_info(self):
self.log.info("Serving notebooks from Azure storage: %s, %s", self.account_name, self.container)
205 changes: 205 additions & 0 deletions IPython/frontend/html/notebook/basenbmanager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""A base class notebook manager.
Authors:
* Brian Granger
"""

#-----------------------------------------------------------------------------
# Copyright (C) 2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------

import os
import uuid

from tornado import web

from IPython.config.configurable import LoggingConfigurable
from IPython.nbformat import current
from IPython.utils.traitlets import List, Dict, Unicode, TraitError

#-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------

class BaseNotebookManager(LoggingConfigurable):

# Todo:
# The notebook_dir attribute is used to mean a couple of different things:
# 1. Where the notebooks are stored if FileNotebookManager is used.
# 2. The cwd of the kernel for a project.
# Right now we use this attribute in a number of different places and
# we are going to have to disentagle all of this.
notebook_dir = Unicode(os.getcwdu(), config=True, help="""
The directory to use for notebooks.
""")
def _notebook_dir_changed(self, name, old, new):
"""do a bit of validation of the notebook dir"""
if os.path.exists(new) and not os.path.isdir(new):
raise TraitError("notebook dir %r is not a directory" % new)
if not os.path.exists(new):
self.log.info("Creating notebook dir %s", new)
try:
os.mkdir(new)
except:
raise TraitError("Couldn't create notebook dir %r" % new)

allowed_formats = List([u'json',u'py'])

# Map notebook_ids to notebook names
mapping = Dict()

def load_notebook_names(self):
"""Load the notebook names into memory.
This should be called once immediately after the notebook manager
is created to load the existing notebooks into the mapping in
memory.
"""
self.list_notebooks()

def list_notebooks(self):
"""List all notebooks.
This returns a list of dicts, each of the form::
dict(notebook_id=notebook,name=name)
This list of dicts should be sorted by name::
data = sorted(data, key=lambda item: item['name'])
"""
raise NotImplementedError('must be implemented in a subclass')


def new_notebook_id(self, name):
"""Generate a new notebook_id for a name and store its mapping."""
# TODO: the following will give stable urls for notebooks, but unless
# the notebooks are immediately redirected to their new urls when their
# filemname changes, nasty inconsistencies result. So for now it's
# disabled and instead we use a random uuid4() call. But we leave the
# logic here so that we can later reactivate it, whhen the necessary
# url redirection code is written.
#notebook_id = unicode(uuid.uuid5(uuid.NAMESPACE_URL,
# 'file://'+self.get_path_by_name(name).encode('utf-8')))

notebook_id = unicode(uuid.uuid4())
self.mapping[notebook_id] = name
return notebook_id

def delete_notebook_id(self, notebook_id):
"""Delete a notebook's id in the mapping.
This doesn't delete the actual notebook, only its entry in the mapping.
"""
del self.mapping[notebook_id]

def notebook_exists(self, notebook_id):
"""Does a notebook exist?"""
return notebook_id in self.mapping

def get_notebook(self, notebook_id, format=u'json'):
"""Get the representation of a notebook in format by notebook_id."""
format = unicode(format)
if format not in self.allowed_formats:
raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
last_modified, nb = self.read_notebook_object(notebook_id)
kwargs = {}
if format == 'json':
# don't split lines for sending over the wire, because it
# should match the Python in-memory format.
kwargs['split_lines'] = False
data = current.writes(nb, format, **kwargs)
name = nb.get('name','notebook')
return last_modified, name, data

def read_notebook_object(self, notebook_id):
"""Get the object representation of a notebook by notebook_id."""
raise NotImplementedError('must be implemented in a subclass')

def save_new_notebook(self, data, name=None, format=u'json'):
"""Save a new notebook and return its notebook_id.
If a name is passed in, it overrides any values in the notebook data
and the value in the data is updated to use that value.
"""
if format not in self.allowed_formats:
raise web.HTTPError(415, u'Invalid notebook format: %s' % format)

try:
nb = current.reads(data.decode('utf-8'), format)
except:
raise web.HTTPError(400, u'Invalid JSON data')

if name is None:
try:
name = nb.metadata.name
except AttributeError:
raise web.HTTPError(400, u'Missing notebook name')
nb.metadata.name = name

notebook_id = self.write_notebook_object(nb)
return notebook_id

def save_notebook(self, notebook_id, data, name=None, format=u'json'):
"""Save an existing notebook by notebook_id."""
if format not in self.allowed_formats:
raise web.HTTPError(415, u'Invalid notebook format: %s' % format)

try:
nb = current.reads(data.decode('utf-8'), format)
except:
raise web.HTTPError(400, u'Invalid JSON data')

if name is not None:
nb.metadata.name = name
self.write_notebook_object(nb, notebook_id)

def write_notebook_object(self, nb, notebook_id=None):
"""Write a notebook object and return its notebook_id.
If notebook_id is None, this method should create a new notebook_id.
If notebook_id is not None, this method should check to make sure it
exists and is valid.
"""
raise NotImplementedError('must be implemented in a subclass')

def delete_notebook(self, notebook_id):
"""Delete notebook by notebook_id."""
raise NotImplementedError('must be implemented in a subclass')

def increment_filename(self, name):
"""Increment a filename to make it unique.
This exists for notebook stores that must have unique names. When a notebook
is created or copied this method constructs a unique filename, typically
by appending an integer to the name.
"""
return name

def new_notebook(self):
"""Create a new notebook and return its notebook_id."""
name = self.increment_filename('Untitled')
metadata = current.new_metadata(name=name)
nb = current.new_notebook(metadata=metadata)
notebook_id = self.write_notebook_object(nb)
return notebook_id

def copy_notebook(self, notebook_id):
"""Copy an existing notebook and return its notebook_id."""
last_mod, nb = self.read_notebook_object(notebook_id)
name = nb.metadata.name + '-Copy'
name = self.increment_filename(name)
nb.metadata.name = name
notebook_id = self.write_notebook_object(nb)
return notebook_id

def log_info(self):
self.log.info("Serving notebooks")

0 comments on commit 9c05615

Please sign in to comment.