/
dom.py
102 lines (81 loc) · 2.93 KB
/
dom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from __future__ import absolute_import, unicode_literals
import re
from lxml import etree, html
# See http://stackoverflow.com/questions/7703018/how-to-write-namespaced-element-attributes-with-lxml
XLINK = 'http://www.w3.org/1999/xlink'
class DOM(object):
"""
Wrapper around our HTML building library to facilitate changes.
"""
@staticmethod
def create_element(type=None, props={}, *children):
"""
Signature inspired by React.createElement.
createElement(
string/ReactClass type,
[object props],
[children ...]
)
https://facebook.github.io/react/docs/top-level-api.html#react.createelement
"""
if not type:
elt = DOM.create_document_fragment()
else:
attributes = {}
# Map props from React/Draft.js to lxml lingo.
if 'className' in props:
props['class'] = props.get('className')
props.pop('className', None)
# TODO One-off fix ATM, even though the problem is everywhere.
if 'xlink:href' in props:
props['{%s}href' % XLINK] = props.get('xlink:href')
props.pop('xlink:href', None)
for key in props:
prop = props[key]
# Filter null values and cast to string for lxml
if prop is not None:
attributes[key] = str(prop)
elt = etree.Element(type, attrib=attributes)
for child in children:
if hasattr(child, 'tag'):
DOM.append_child(elt, child)
else:
elt.text = elt.text + child if elt.text else child
return elt
@staticmethod
def create_document_fragment():
return etree.Element('fragment')
@staticmethod
def create_text_node(text):
elt = etree.Element('textnode')
DOM.set_text_content(elt, text)
return elt
@staticmethod
def parse_html(markup):
return etree.fromstring(markup)
@staticmethod
def append_child(elt, child):
elt.append(child)
@staticmethod
def set_attribute(elt, attr, value):
elt.set(attr, value)
@staticmethod
def set_text_content(elt, text):
elt.text = text
@staticmethod
def get_children(elt):
return elt.getchildren()
@staticmethod
def render(elt):
"""
Removes the fragments that should not have HTML tags. Caveat of lxml.
Dirty, but quite easy to understand.
"""
return re.sub(r'</?(fragment|textnode)>', '', etree.tostring(elt, method='html').decode('utf-8'))
@staticmethod
def pretty_print(markup):
"""
Convenience method.
Pretty print the element, removing the top-level node that lxml needs.
"""
return re.sub(r'</?doc>', '', etree.tostring(html.fromstring('<doc>%s</doc>' % markup), encoding='unicode', pretty_print=True))