This repository has been archived by the owner on Mar 25, 2022. It is now read-only.
/
parser.py
295 lines (241 loc) · 9.91 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
"""Docutils CommonMark parser"""
import sys
from os.path import splitext
from docutils import parsers, nodes
from sphinx import addnodes
from commonmark import Parser
from warnings import warn
if sys.version_info < (3, 0):
from urlparse import urlparse, unquote
else:
from urllib.parse import urlparse, unquote
__all__ = ['CommonMarkParser']
class CommonMarkParser(parsers.Parser):
"""Docutils parser for CommonMark"""
supported = ('md', 'markdown')
translate_section_name = None
default_config = {
'known_url_schemes': None,
}
def __init__(self):
self._level_to_elem = {}
def parse(self, inputstring, document):
self.document = document
self.current_node = document
self.config = self.default_config.copy()
try:
new_cfg = self.document.settings.env.config.recommonmark_config
self.config.update(new_cfg)
except AttributeError:
pass
self.setup_parse(inputstring, document)
self.setup_sections()
parser = Parser()
ast = parser.parse(inputstring + '\n')
self.convert_ast(ast)
self.finish_parse()
def convert_ast(self, ast):
for (node, entering) in ast.walker():
fn_prefix = "visit" if entering else "depart"
fn_name = "{0}_{1}".format(fn_prefix, node.t.lower())
fn_default = "default_{0}".format(fn_prefix)
fn = getattr(self, fn_name, None)
if fn is None:
fn = getattr(self, fn_default)
fn(node)
# Node type enter/exit handlers
def default_visit(self, mdnode):
pass
def default_depart(self, mdnode):
"""Default node depart handler
If there is a matching ``visit_<type>`` method for a container node,
then we should make sure to back up to it's parent element when the node
is exited.
"""
if mdnode.is_container():
fn_name = 'visit_{0}'.format(mdnode.t)
if not hasattr(self, fn_name):
warn("Container node skipped: type={0}".format(mdnode.t))
else:
self.current_node = self.current_node.parent
def visit_heading(self, mdnode):
# Test if we're replacing a section level first
if isinstance(self.current_node, nodes.section):
if self.is_section_level(mdnode.level, self.current_node):
self.current_node = self.current_node.parent
title_node = nodes.title()
title_node.line = mdnode.sourcepos[0][0]
new_section = nodes.section()
new_section.line = mdnode.sourcepos[0][0]
new_section.append(title_node)
self.add_section(new_section, mdnode.level)
# Set the current node to the title node to accumulate text children/etc
# for heading.
self.current_node = title_node
def depart_heading(self, _):
"""Finish establishing section
Wrap up title node, but stick in the section node. Add the section names
based on all the text nodes added to the title.
"""
assert isinstance(self.current_node, nodes.title)
# The title node has a tree of text nodes, use the whole thing to
# determine the section id and names
text = self.current_node.astext()
if self.translate_section_name:
text = self.translate_section_name(text)
name = nodes.fully_normalize_name(text)
section = self.current_node.parent
section['names'].append(name)
self.document.note_implicit_target(section, section)
self.current_node = section
def visit_text(self, mdnode):
self.current_node.append(nodes.Text(mdnode.literal, mdnode.literal))
def visit_softbreak(self, _):
self.current_node.append(nodes.Text('\n'))
def visit_linebreak(self, _):
self.current_node.append(nodes.raw('', '<br />', format='html'))
def visit_paragraph(self, mdnode):
p = nodes.paragraph(mdnode.literal)
p.line = mdnode.sourcepos[0][0]
self.current_node.append(p)
self.current_node = p
def visit_emph(self, _):
n = nodes.emphasis()
self.current_node.append(n)
self.current_node = n
def visit_strong(self, _):
n = nodes.strong()
self.current_node.append(n)
self.current_node = n
def visit_code(self, mdnode):
n = nodes.literal(mdnode.literal, mdnode.literal)
self.current_node.append(n)
def visit_link(self, mdnode):
ref_node = nodes.reference()
# Check destination is supported for cross-linking and remove extension
destination = mdnode.destination
_, ext = splitext(destination)
# Check if the destination starts with a url scheme, since internal and
# external links need to be handled differently.
url_check = urlparse(destination)
known_url_schemes = self.config.get('known_url_schemes')
if known_url_schemes:
scheme_known = url_check.scheme in known_url_schemes
else:
scheme_known = bool(url_check.scheme)
# TODO check for other supported extensions, such as those specified in
# the Sphinx conf.py file but how to access this information?
if not scheme_known and ext.replace('.', '') in self.supported:
destination = destination.replace(ext, '')
ref_node['refuri'] = destination
# TODO okay, so this is acutally not always the right line number, but
# these mdnodes won't have sourcepos on them for whatever reason. This
# is better than 0 though.
ref_node.line = self._get_line(mdnode)
if mdnode.title:
ref_node['title'] = mdnode.title
next_node = ref_node
# If there's not a url scheme (e.g. 'https' for 'https:...' links),
# or there is a scheme but it's not in the list of known_url_schemes,
# then assume it's a cross-reference and pass it to Sphinx as an `:any:` ref.
if not url_check.fragment and not scheme_known:
wrap_node = addnodes.pending_xref(
reftarget=unquote(destination),
reftype='any',
refdomain=None, # Added to enable cross-linking
refexplicit=True,
refwarn=True
)
# TODO also not correct sourcepos
wrap_node.line = self._get_line(mdnode)
if mdnode.title:
wrap_node['title'] = mdnode.title
wrap_node.append(ref_node)
next_node = wrap_node
self.current_node.append(next_node)
self.current_node = ref_node
def depart_link(self, mdnode):
if isinstance(self.current_node.parent, addnodes.pending_xref):
self.current_node = self.current_node.parent.parent
else:
self.current_node = self.current_node.parent
def visit_image(self, mdnode):
img_node = nodes.image()
img_node['uri'] = mdnode.destination
if mdnode.first_child and mdnode.first_child.literal:
content = [mdnode.first_child.literal]
n = mdnode.first_child
mdnode.first_child.literal = ''
mdnode.first_child = mdnode.last_child = None
while getattr(n, 'nxt'):
n.nxt, n = None, n.nxt
content.append(n.literal)
img_node['alt'] = ''.join(content)
self.current_node.append(img_node)
self.current_node = img_node
def visit_list(self, mdnode):
list_node = None
if (mdnode.list_data['type'] == "bullet"):
list_node_cls = nodes.bullet_list
else:
list_node_cls = nodes.enumerated_list
list_node = list_node_cls()
list_node.line = mdnode.sourcepos[0][0]
self.current_node.append(list_node)
self.current_node = list_node
def visit_item(self, mdnode):
node = nodes.list_item()
node.line = mdnode.sourcepos[0][0]
self.current_node.append(node)
self.current_node = node
def visit_code_block(self, mdnode):
kwargs = {}
if mdnode.is_fenced and mdnode.info:
kwargs['language'] = mdnode.info
text = ''.join(mdnode.literal)
if text.endswith('\n'):
text = text[:-1]
node = nodes.literal_block(text, text, **kwargs)
self.current_node.append(node)
def visit_block_quote(self, mdnode):
q = nodes.block_quote()
q.line = mdnode.sourcepos[0][0]
self.current_node.append(q)
self.current_node = q
def visit_html(self, mdnode):
raw_node = nodes.raw(mdnode.literal,
mdnode.literal, format='html')
if mdnode.sourcepos is not None:
raw_node.line = mdnode.sourcepos[0][0]
self.current_node.append(raw_node)
def visit_html_inline(self, mdnode):
self.visit_html(mdnode)
def visit_html_block(self, mdnode):
self.visit_html(mdnode)
def visit_thematic_break(self, _):
self.current_node.append(nodes.transition())
# Section handling
def setup_sections(self):
self._level_to_elem = {0: self.document}
def add_section(self, section, level):
parent_level = max(
section_level for section_level in self._level_to_elem
if level > section_level
)
parent = self._level_to_elem[parent_level]
parent.append(section)
self._level_to_elem[level] = section
# Prune level to limit
self._level_to_elem = dict(
(section_level, section)
for section_level, section in self._level_to_elem.items()
if section_level <= level
)
def is_section_level(self, level, section):
return self._level_to_elem.get(level, None) == section
def _get_line(self, mdnode):
while mdnode:
if mdnode.sourcepos:
return mdnode.sourcepos[0][0]
mdnode = mdnode.parent
return 0