Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
503 lines (358 sloc) 12.9 KB
import codecs
import tempfile
import warnings
from urllib.parse import urlsplit
import requests
class RenderData(object):
class Operation(object):
requires = set()
optional = set()
allowed_children = set()
requires_children = False
def __init__(self, *children, **kwargs):
self.parent = None
# Handle a list of children being passed in
if len(children) == 1 and isinstance(children[0], list):
children = children[0]
self.children = list(children) or []
self.args = []
self.format = None
self.attributes = kwargs.pop("attributes", {}) = self.attributes.pop("id", None)
self.render = RenderData()
self.source = None
for k in self.optional:
setattr(self, k, None)
for kwarg, val in kwargs.items():
if kwarg not in self.requires and kwarg not in self.optional:
raise RuntimeError("Unexpected argument {0}".format(kwarg))
setattr(self, kwarg, val)
if self.allowed_children:
for child in self.children:
if child.__class__.__name__ not in self.allowed_children:
raise RuntimeError("Child {0} is not allowed!".format(child.__class__.__name__))
def original_attributes(self):
attrs = self.attributes.copy()
attrs['id'] =
return attrs
def set_source(self, source):
self.source = source
def is_child_allowed(self, child):
return not (self.allowed_children and child.__class__.__name__ not in self.allowed_children)
def _check_child_allowed(self, child):
allowed = self.is_child_allowed(child)
if isinstance(child, Text) and not allowed:
return False
if not allowed:
raise RuntimeError("Child {0} is not allowed in parent {1}".format(child.__class__.__name__,
return True
def add_child(self, child):
if self._check_child_allowed(child):
def add_children(self, children):
for child in children:
def insert_child(self, index, child):
self.children.insert(index, child)
def remove_child(self, child):
def replace_child(self, child, new_child):
self.children[self.child_index(child)] = new_child
def has_child(self, child_class):
return any(isinstance(c, child_class) for c in self.children)
def child_index(self, child):
return self.children.index(child)
def previous_sibling(self):
left = self.left_siblings
return None if left is None else left[0]
def next_sibling(self):
right = self.right_siblings
return None if right is None else right[0]
def right_siblings(self):
idx = self.parent.child_index(self)
if idx == (len(self.parent) - 1):
return None
return self.parent[idx+1:]
def left_siblings(self):
idx = self.parent.child_index(self)
if idx == 0:
return None
return reversed(self.parent[:idx])
def has_children(self):
return bool(self.children)
def has_parent(self, parent_cls):
return any(isinstance(p, parent_cls) for p in self.ancestors)
def ancestors(self):
parent = self.parent
while parent:
yield parent
parent = parent.parent
def descendants(self):
for child in self.children:
yield child
yield from child.descendants
def __repr__(self):
if len(self.children) == 1:
child_repr = repr(self.children[0])
child_repr = repr(self.children)
return "<{0}: {1}>".format(self.__class__.__name__, child_repr)
def set_parent(self, parent):
self.parent = parent
def set_parents(self, parent=None):
for child in self.children:
def __len__(self):
return len(self.children)
def __getitem__(self, item):
return self.children[item]
class ChildlessOperation(Operation):
def __init__(self, **kwargs):
super().__init__([], **kwargs)
def __repr__(self):
return "<{0}>".format(self.__class__.__name__)
def has_child(self, child_class):
return False
def replace_child(self, child, new_child):
raise RuntimeError("ChildlessOperation: Cannot replace child")
class IgnoredOperation(Operation):
class Group(Operation):
def is_root_group(self):
return self.parent is None
class Bold(Operation):
class Italic(Operation):
class UnderLine(Operation):
class Text(ChildlessOperation):
requires = {"text"}
def __repr__(self):
return "<Text '{0}' />".format(self.short_text)
def short_text(self):
if len(self.text) > 10:
txt = self.text[:10] + "..."
txt = self.text
return repr(txt)
class Paragraph(Operation):
class BlockParagraph(Operation):
Same as Paragraph but doesn't add a newline at the end
class CodeBlock(Operation):
optional = {"highlight", "text"}
def highlighted_operations(self):
from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound
from pygments import highlight
from pygments.formatters import get_formatter_by_name
from wordinserter import parse
import warnings
formatter = get_formatter_by_name("html")
lexer = get_lexer_by_name(self.highlight)
except ClassNotFound:
warnings.warn("Lexer {0} or formatter html not found, not highlighting".format(self.highlight))
return None
formatter.noclasses = True
highlighted_code = highlight(self.text, lexer=lexer, formatter=formatter)
return parse(highlighted_code, parser="html")
class InlineCode(Operation):
class LineBreak(ChildlessOperation):
class Span(Operation):
class Format(Operation):
optional = {
"background_color": "background"
NESTED_STYLES = {"border", "margin", "padding"}
def has_format(self):
return any(getattr(self, name) for name in self.optional)
def __repr__(self):
return "<{0}: {1}>".format(self.__class__.__name__,
{n: getattr(self, n) for n in self.optional if getattr(self, n) is not None})
def has_style(self):
return any(getattr(self, s) for s in self.optional)
def should_use_x_hack(self):
return any(getattr(self, s) for s in self.NEEDS_X_HACK)
class Style(Operation):
requires = {"name"}
class Font(Operation):
optional = {"size", "color"}
class Image(ChildlessOperation):
requires = {"location"}
optional = {"height", "width", "caption"}
def write_to_temp_file(data):
with tempfile.NamedTemporaryFile(delete=False) as temp:
def get_404_image_and_dimensions(self):
import pkg_resources
not_found_image = pkg_resources.resource_string(__name__, "images/404.png")
# Hard coded widths :'(
return self.write_to_temp_file(not_found_image), 300, 220
def get_image_path_and_dimensions(self):
if hasattr(self, "_path_cache"):
return self._path_cache
result = self.location
original_height, original_width = self.height, self.width
height, width = original_height, original_width
split = urlsplit(result)
if split.scheme not in {"http", "https", "data"}:
warnings.warn('Invalid image scheme {scheme}: {url}'.format(url=result, scheme=split.scheme))
result, height, width = self.get_404_image_and_dimensions()
elif split.scheme in {"http", "https"}:
response = requests.get(result, verify=False, timeout=5)
except requests.RequestException as e:
warnings.warn('Unable to prefetch image {url}: {ex}'.format(url=result, ex=e))
result, height, width = self.get_404_image_and_dimensions()
result = self.write_to_temp_file(response.content)
elif split.scheme == 'data':
mimetype, rest = split.path.split(";")
encoding, data = rest.split(",")
except Exception:
warnings.warn("Could not parse data URI! First 25 chars: {data}".format(data=result[:25]))
result, height, width = self.get_404_image_and_dimensions()
if encoding != "base64":
warnings.warn("Encoding {0} is not valid".format(encoding))
result, height, width = self.get_404_image_and_dimensions()
image_content = codecs.decode(bytes(data, "utf8"), "base64")
result = self.write_to_temp_file(image_content)
self._path_cache = result
return result, original_height or height, original_width or width
class HyperLink(Operation):
requires = {"location"}
optional = {"label"}
class BaseList(Operation):
allowed_children = {"ListElement", "BulletList", "NumberedList"}
requires_children = True
def depth(self):
return sum(1 for p in self.ancestors if isinstance(p, BaseList))
def sub_lists(self):
return sum(1 for p in self.children if isinstance(p, BaseList))
class BulletList(BaseList):
optional = {"type"} # Does nothing on BulletList, just here to match NumberedList
class NumberedList(BaseList):
optional = {"type"}
class ListElement(Operation):
requires_children = True
class Table(Operation):
allowed_children = {"TableRow", "TableHead", "TableBody"}
optional = {"border"}
def dimensions(self):
Returns row, column counts
if not self.has_children:
return 0, 0
rows = len(self.children)
columns = max(sum(child.colspan or 1 for child in row.children) for row in self.children)
return rows, columns
def is_uniform(self):
if not self.has_children:
return False
if len(self.children) == 1:
return True
first_row = len(self.children[0].children)
return all(len(child.children) == first_row for child in self.children[1:])
def width(self):
if not self.format.width or not self.format.width.endswith('%'):
return float(self.format.width[:-1])
except TypeError:
warnings.warn("Invalid table width {0}".format(self.format.width))
def update_child_widths(self):
if not self.is_uniform:
row_widths = [None] * self.dimensions[1]
for idx, _ in enumerate(row_widths):
for row in self.children:
child = row.children[idx]
child_width = child.width
if child_width is not None:
row_widths[idx] = child.format.width
for row in self.children:
for idx, cell in enumerate(row.children):
cell.format.width = row_widths[idx]
class TableHead(IgnoredOperation):
allowed_children = {"TableRow"}
class TableBody(IgnoredOperation):
allowed_children = {"TableRow"}
class TableRow(Operation):
allowed_children = {"TableCell"}
class TableCell(Operation):
optional = {"colspan", "rowspan", "orientation"}
def width(self):
if self.format.width is None or not self.format.width.endswith('%'):
return float(self.format.width[:-1])
except TypeError:
warnings.warn("Invalid row width {0}".format(self.format.width))
class Footnote(ChildlessOperation):
class Heading(Operation):
requires = {'level'}
allowed_children = {'Text'}