pweave/formatters/base.py

import textwrap
import os
import base64
import copy
from nbconvert import filters

# Pweave output formatters
class PwebFormatter(object):
    """Base class for all not-notebook formatters"""

    def __init__(self, executed, *, kernel = "python3", language = "python",
                 mimetype = None, source = None, theme = None,
                 figdir = "figures", wd = "."):

        self.mimetypes = [] #other supported mimetypes than text/plain
        self.executed = executed
        self.figdir = figdir
        self.wd = wd
        self.source = source
        self.theme = theme
        self.language = language

        #To be set in child classess
        self.file_ext = None
        self.header = None
        self.footer = None


        self.wrapper = textwrap.TextWrapper(subsequent_indent="", break_long_words=False)

        self.mime_extensions = {"application/pdf" : "pdf",
                                "image/png" : "png",
                                "image/jpg" : "jpg"}
        self.initformat()
        self._fillformatdict()


    def initformat(self):
        pass


    def format(self):
        self.formatted = []
        for chunk in self.executed:
            # Fill in options for code chunks
            if chunk['type'] == "code":
                for key in self.formatdict.keys():
                    if not key in chunk:
                        chunk[key] = self.formatdict[key]

            # Wrap text if option is set
            if chunk['type'] == "code":
                if chunk["wrap"] is True or chunk['wrap'] == "code":
                    chunk['content'] = self._wrap(chunk['content'])

            # Preformat chunk content before default formatters
            chunk = self.preformat_chunk(chunk)

            if chunk['type'] == "doc":
                self.formatted.append(self.format_docchunk(chunk))
            elif chunk['type'] == "code":
                self.formatted.append(self.format_codechunks(chunk))
            else:
                self.formatted.append(chunk["content"])

        self.formatted = "\n".join(self.formatted)
        self.convert()  # Convert to e.g. markdown
        self.add_header()
        self.add_footer()

    def convert(self):
        pass

    def preformat_chunk(self, chunk):
        """You can use this method in subclasses to preformat chunk content"""
        return chunk

    def figures_from_chunk(self, chunk):
        """Extract base64 encoded figures from chunk"""
        figs = []
        i = 1
        for out in chunk["result"]:
            if out["output_type"] != "display_data":
                continue
        #Loop trough mimetypes in order of preference
            for mimetype in self.fig_mimetypes:
                if mimetype in out["data"]:
                    fig_name, include_name = self.get_figname(chunk, i, mimetype)
                    figs.append(include_name)
                    bfig = base64.b64decode(out["data"][mimetype])
                    f = open(fig_name, "wb")
                    f.write(bfig)
                    f.close()
                    i += 1
                    break

        #print(figs)
        return figs


    def format_termchunk(self, chunk):
        if chunk['echo'] and chunk['results'] != 'hidden':
            chunk['result'] = self._termindent(chunk['result'])
            result = '%(termstart)s%(result)s%(termend)s' % chunk
        else:
            result = ""
        return result

    def format_codeblock(self, chunk):
        pass

    def format_results(self, chunk):
        pass

    def render_jupyter_output(self, out, chunk):
        if out["output_type"] == "error":
            return self.render_traceback("".join(out["traceback"]), chunk)

        if out["output_type"] == "stream":
            return self.render_text(out["text"], chunk)

        for mimetype in self.mimetypes:
            if mimetype in out["data"]:
                if mimetype == "application/javascript":
                    return ("\n<script>" + out["data"][mimetype] + "</script>")
                else:
                    return("\n" + out["data"][mimetype])
        #Return nothing if data is shown as figure
        for mimetype in self.fig_mimetypes:
            if mimetype in out["data"]:
                return ""

        if "text/plain" in out["data"]:
            return self.render_text(out["data"]["text/plain"], chunk)
        else:
            return ""

    def highlight_ansi_and_escape(self, text):
        return self.escape(filters.strip_ansi(text))

    def escape(self, text):
        return text

    def render_traceback(self, text, chunk):
        chunk = copy.deepcopy(chunk)
        text = self.highlight_ansi_and_escape(text)
        return self.format_text_result(text, chunk)

    def render_text(self, text, chunk):
        chunk = copy.deepcopy(chunk)
        text = self.highlight_ansi_and_escape(text)
        return self.format_text_result(text, chunk)

        #Set lexers for code and output

    def format_text_result(self, text, chunk):
        chunk["result"] = text
        result = ""
        if "%s" in chunk["outputstart"]:
            chunk["outputstart"] = chunk["outputstart"] % self.language
        if "%s" in chunk["termstart"]:
            chunk["termstart"] = chunk["termstart"] % self.language


        #Other things than term
        if chunk['results'] == 'verbatim':
            if len(chunk['result'].strip()) > 0:
                if chunk["wrap"] is True or chunk['wrap'] == 'results' or chunk['wrap'] == 'output':
                    chunk['result'] = self._wrap(chunk["result"])
                chunk['result'] = "\n%s\n" % chunk["result"].rstrip()
                chunk['result'] = self._indent(chunk['result'])
                #chunk["result"] = self.fix_linefeeds(chunk['result'])
                result += '%(outputstart)s%(result)s%(outputend)s' % chunk
        elif chunk['results'] != 'verbatim':
            result += self.fix_linefeeds(text)

        return(result)


    def fix_linefeeds(self, text):
        """Add empty line to start and end of string if it
        they don't exist"""

        if not text.startswith("\n"):
            text = "\n" + text
        if not text.endswith("\n"):
            text = text + "\n"

        return(text)

    def format_codechunks(self, chunk):
        chunk['content'] = self._indent(chunk['content'])

        # Code is not executed
        if not chunk['evaluate']:
            chunk["content"] = self.fix_linefeeds(chunk["content"])
            if "%s" in chunk["codestart"]:
                chunk["codestart"] = chunk["codestart"] % self.language
            if chunk['echo']:
                result = '%(codestart)s%(content)s%(codeend)s' % chunk
                return result
            else:
                return ''

        #Code is executed
        #-------------------
        if "%s" in chunk["codestart"]:
            chunk["codestart"] = chunk["codestart"] % self.language

        result = ""

        if chunk['echo']:
            chunk["content"] = self.fix_linefeeds(chunk["content"])
            result += '%(codestart)s%(content)s%(codeend)s' % chunk

        if chunk['results'] != 'hidden':
            stream_result = {"output_type" : "stream", "text" : ""}
            other_result = ""
            for out in chunk["result"]:
                if out["output_type"] == "stream":
                    stream_result["text"] += out["text"]
                else:
                    other_result += self.render_jupyter_output(out, chunk)

            result += self.render_jupyter_output(stream_result, chunk)
            result += other_result

        #Handle figures
        chunk['figure'] = self.figures_from_chunk(chunk) #Save embedded figures to file

        if chunk['fig'] and 'figure' in chunk:
            if chunk['include']:
                result += self.formatfigure(chunk)
        return result

    def format_docchunk(self, chunk):
        return chunk['content']

    def add_header(self):
        """Can be used to add header to self.formatted list"""
        if self.header is not None:
            self.formatted = self.header + self.formatted

    def add_footer(self):
        """Can be used to add footer to self.formatted list"""
        if self.footer is not None:
            self.formatted += self.footer

    def getformatdict(self):
        return self.formatdict

    def getformatted(self):
        return self.formatted

    def updateformatdict(self, format_dict):
        self.formatdict.update(format_dict)

    def _wrapper(self, string, width=80):
        """Wrap a string to specified width like Python terminal"""
        if len(string) < width:
            return string
        # Wrap also comment lines
        if string.lstrip()[0] == "#":
            return string[0:width] + '\n' + self._wrapper("#" + string[width:len(string)], width)
        else:
            return string[0:width] + '\n' + self._wrapper(string[width:len(string)], width)

    def _wrap(self, content):
        splitted = content.split("\n")
        result = ""
        for line in splitted:
            result += self.wrapper.fill(line) + '\n'
        return result

    def _fillformatdict(self):
        """Fill in the blank options that are now only used for rst
            but also allow e.g. special latex style for terminal blocks etc."""
        self._fillkey('termstart', self.formatdict['codestart'])
        self._fillkey('termend', self.formatdict['codeend'])
        self._fillkey('savedformats', list([self.formatdict['figfmt']]))

    def _fillkey(self, key, value):
        if key not in self.formatdict:
            self.formatdict[key] = value

    def _indent(self, text):
        """Indent blocks for formats where indent is significant"""
        return text
        # return(text.replace('\n', '\n' + self.formatdict['indent']))

    def _termindent(self, text):
        """Indent blocks for formats where indent is significant"""
        return text
        # return(text.replace('\n', '\n' + self.formatdict['termindent']))

    def sanitize_filename(self, fname):
        return "".join(i for i in fname if i not in "\/:*?<>|")

    def get_figname(self, chunk, i, mimetype):
        save_dir = self.getFigDirectory()
        include_dir = self.figdir

        ext = "." + self.mime_extensions[mimetype]
        base = os.path.splitext(os.path.basename(self.source))[0]

        if chunk['name'] is None:
            prefix = base + '_figure' + str(chunk['number']) + "_" + str(i)
        else:
            prefix = base + '_' + self.sanitize_filename(chunk['name']) + "_" + str(i)

        self.ensureDirectoryExists(self.getFigDirectory())

        save_name = os.path.join(save_dir, prefix + ext)
        include_name = os.path.join(include_dir, prefix + ext).replace("\\", "/")

        return save_name, include_name


    def getFigDirectory(self):
        return os.path.join(self.wd, self.figdir)

    def ensureDirectoryExists(self, figdir):
        if not os.path.isdir(figdir):
            os.mkdir(figdir)