Merge pull request #42 from Carreau/refactor

Reorganize nbconvert.py into multiple file. Rewrite some test.
juliantaylor · Nov 4, 2012 · 3ebf739 · 3ebf739
2 parents 3149095 + 8201133
commit 3ebf739
Show file tree

Hide file tree

Showing 24 changed files with 66,854 additions and 1,469 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,6 @@
+language: python
+python:
+    - 2.7
+before_script:
+  - sudo apt-get install pandoc
+script: nosetests
diff --git a/converters/__init__.py b/converters/__init__.py
diff --git a/converters/base.py b/converters/base.py
@@ -0,0 +1,323 @@
+from __future__ import print_function, absolute_import
+from converters.utils import remove_fake_files_url
+
+# Stdlib
+import codecs
+import io
+import logging
+import os
+import pprint
+from types import FunctionType
+
+# From IPython
+from IPython.nbformat import current as nbformat
+
+# local
+
+#-----------------------------------------------------------------------------
+# Class declarations
+#-----------------------------------------------------------------------------
+
+class ConversionException(Exception):
+    pass
+
+class DocStringInheritor(type):
+    """
+    This metaclass will walk the list of bases until the desired
+    superclass method is found AND if that method has a docstring and only
+    THEN does it attach the superdocstring to the derived class method.
+
+    Please use carefully, I just did the metaclass thing by following
+    Michael Foord's Metaclass tutorial
+    (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
+    have missed a step or two.
+
+    source:
+    http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
+    by Paul McGuire
+    """
+    def __new__(meta, classname, bases, classDict):
+        newClassDict = {}
+        for attributeName, attribute in classDict.items():
+            if type(attribute) == FunctionType:
+                # look through bases for matching function by name
+                for baseclass in bases:
+                    if hasattr(baseclass, attributeName):
+                        basefn = getattr(baseclass, attributeName)
+                        if basefn.__doc__:
+                            attribute.__doc__ = basefn.__doc__
+                            break
+            newClassDict[attributeName] = attribute
+        return type.__new__(meta, classname, bases, newClassDict)
+
+class Converter(object):
+    __metaclass__ = DocStringInheritor
+    default_encoding = 'utf-8'
+    extension = str()
+    figures_counter = 0
+    infile = str()
+    infile_dir = str()
+    infile_root = str()
+    files_dir = str()
+    with_preamble = True
+    user_preamble = None
+    output = unicode()
+    raw_as_verbatim = False
+
+    def __init__(self, infile):
+        self.infile = infile
+        self.infile_dir, infile_root = os.path.split(infile)
+        infile_root = os.path.splitext(infile_root)[0]
+        files_dir = os.path.join(self.infile_dir, infile_root + '_files')
+        if not os.path.isdir(files_dir):
+            os.mkdir(files_dir)
+        self.infile_root = infile_root
+        self.files_dir = files_dir
+        self.outbase = os.path.join(self.infile_dir, infile_root)
+
+    def __del__(self):
+        if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
+            os.rmdir(self.files_dir)
+
+    def dispatch(self, cell_type):
+        """return cell_type dependent render method,  for example render_code
+        """
+        return getattr(self, 'render_' + cell_type, self.render_unknown)
+
+    def dispatch_display_format(self, format):
+        """return output_type dependent render method,  for example render_output_text
+        """
+        return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
+
+    def convert(self, cell_separator='\n'):
+        """
+        Generic method to converts notebook to a string representation.
+
+        This is accomplished by dispatching on the cell_type, so subclasses of
+        Convereter class do not need to re-implement this method, but just
+        need implementation for the methods that will be dispatched.
+
+        Parameters
+        ----------
+        cell_separator : string
+          Character or string to join cells with. Default is "\n"
+
+        Returns
+        -------
+        out : string
+        """
+        lines = []
+        lines.extend(self.optional_header())
+        lines.extend(self.main_body(cell_separator))
+        lines.extend(self.optional_footer())
+        return u'\n'.join(lines)
+
+    def main_body(self, cell_separator='\n'):
+        converted_cells = []
+        for worksheet in self.nb.worksheets:
+            for cell in worksheet.cells:
+                #print(cell.cell_type)  # dbg
+                conv_fn = self.dispatch(cell.cell_type)
+                if cell.cell_type in ('markdown', 'raw'):
+                    remove_fake_files_url(cell)
+                converted_cells.append('\n'.join(conv_fn(cell)))
+        cell_lines = cell_separator.join(converted_cells).split('\n')
+        return cell_lines
+
+    def render(self):
+        "read, convert, and save self.infile"
+        if not hasattr(self, 'nb'):
+            self.read()
+        self.output = self.convert()
+        assert(type(self.output) == unicode)
+        return self.save()
+
+    def read(self):
+        "read and parse notebook into NotebookNode called self.nb"
+        with open(self.infile) as f:
+            self.nb = nbformat.read(f, 'json')
+
+    def save(self, outfile=None, encoding=None):
+        "read and parse notebook into self.nb"
+        if outfile is None:
+            outfile = self.outbase + '.' + self.extension
+        if encoding is None:
+            encoding = self.default_encoding
+        with io.open(outfile, 'w', encoding=encoding) as f:
+            f.write(self.output)
+        return os.path.abspath(outfile)
+
+    def optional_header(self):
+        """
+        Optional header to insert at the top of the converted notebook
+
+        Returns a list
+        """
+        return []
+
+    def optional_footer(self):
+        """
+        Optional footer to insert at the end of the converted notebook
+
+        Returns a list
+        """
+        return []
+
+    def _new_figure(self, data, fmt):
+        """Create a new figure file in the given format.
+
+        Returns a path relative to the input file.
+        """
+        figname = '%s_fig_%02i.%s' % (self.infile_root, 
+                                      self.figures_counter, fmt)
+        self.figures_counter += 1
+        fullname = os.path.join(self.files_dir, figname)
+
+        # Binary files are base64-encoded, SVG is already XML
+        if fmt in ('png', 'jpg', 'pdf'):
+            data = data.decode('base64')
+            fopen = lambda fname: open(fname, 'wb')
+        else:
+            fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
+
+        with fopen(fullname) as f:
+            f.write(data)
+
+        return fullname
+
+    def render_heading(self, cell):
+        """convert a heading cell
+
+        Returns list."""
+        raise NotImplementedError
+
+    def render_code(self, cell):
+        """Convert a code cell
+
+        Returns list."""
+        raise NotImplementedError
+
+    def render_markdown(self, cell):
+        """convert a markdown cell
+
+        Returns list."""
+        raise NotImplementedError
+
+    def _img_lines(self, img_file):
+        """Return list of lines to include an image file."""
+        # Note: subclasses may choose to implement format-specific _FMT_lines
+        # methods if they so choose (FMT in {png, svg, jpg, pdf}).
+        raise NotImplementedError
+
+    def render_display_data(self, output):
+        """convert display data from the output of a code cell
+
+        Returns list.
+        """
+        lines = []
+
+        for fmt in output.keys():
+            if fmt in ['png', 'svg', 'jpg', 'pdf']:
+                img_file = self._new_figure(output[fmt], fmt)
+                # Subclasses can have format-specific render functions (e.g.,
+                # latex has to auto-convert all SVG to PDF first).
+                lines_fun = getattr(self, '_%s_lines' % fmt, None)
+                if not lines_fun:
+                    lines_fun = self._img_lines
+                lines.extend(lines_fun(img_file))
+            elif fmt != 'output_type':
+                conv_fn = self.dispatch_display_format(fmt)
+                lines.extend(conv_fn(output))
+        return lines
+
+    def render_raw(self, cell):
+        """convert a cell with raw text
+
+        Returns list."""
+        raise NotImplementedError
+
+    def render_unknown(self, cell):
+        """Render cells of unkown type
+
+        Returns list."""
+        data = pprint.pformat(cell)
+        logging.warning('Unknown cell: %s' % cell.cell_type)
+        return self._unknown_lines(data)
+
+    def render_unknown_display(self, output, type):
+        """Render cells of unkown type
+
+        Returns list."""
+        data = pprint.pformat(output)
+        logging.warning('Unknown output: %s' % output.output_type)
+        return self._unknown_lines(data)
+
+    def render_stream(self, output):
+        """render the stream part of an output
+
+        Returns list.
+
+        Identical to render_display_format_text
+        """
+        return self.render_display_format_text(output)
+
+    def render_pyout(self, output):
+        """convert pyout part of a code cell
+
+        Returns list."""
+        raise NotImplementedError
+
+
+    def render_pyerr(self, output):
+        """convert pyerr part of a code cell
+
+        Returns list."""
+        raise NotImplementedError
+
+    def _unknown_lines(self, data):
+        """Return list of lines for an unknown cell.
+
+        Parameters
+        ----------
+        data : str
+          The content of the unknown data as a single string.
+        """
+        raise NotImplementedError
+
+    # These are the possible format types in an output node
+
+    def render_display_format_text(self, output):
+        """render the text part of an output
+
+        Returns list.
+        """
+        raise NotImplementedError
+
+    def render_display_format_html(self, output):
+        """render the html part of an output
+
+        Returns list.
+        """
+        raise NotImplementedError
+
+    def render_display_format_latex(self, output):
+        """render the latex part of an output
+
+        Returns list.
+        """
+        raise NotImplementedError
+
+    def render_display_format_json(self, output):
+        """render the json part of an output
+
+        Returns list.
+        """
+        raise NotImplementedError
+
+    def render_display_format_javascript(self, output):
+        """render the javascript part of an output
+
+        Returns list.
+        """
+        raise NotImplementedError
+
diff --git a/converters/bloggerhtml.py b/converters/bloggerhtml.py
@@ -0,0 +1,21 @@
+from converters.html import ConverterHTML 
+import io
+
+class ConverterBloggerHTML(ConverterHTML):
+    """Convert a notebook to html suitable for easy pasting into Blogger.
+
+    It generates an html file that has *only* the pure HTML contents, and a
+    separate file with `_header` appended to the name with all header contents.
+    Typically, the header file only needs to be used once when setting up a
+    blog, as the CSS for all posts is stored in a single location in Blogger.
+    """
+
+    def optional_header(self):
+        with io.open(self.outbase + '_header.html', 'w',
+                     encoding=self.default_encoding) as f:
+            f.write('\n'.join(self.header_body()))
+        return []
+
+    def optional_footer(self):
+        return []
+