/
readonlytree.pxi
554 lines (493 loc) · 18.5 KB
/
readonlytree.pxi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# read-only tree implementation
@cython.internal
cdef class _ReadOnlyProxy:
u"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
cdef bint _free_after_use
cdef xmlNode* _c_node
cdef _ReadOnlyProxy _source_proxy
cdef list _dependent_proxies
def __cinit__(self):
self._c_node = NULL
self._free_after_use = 0
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
if not self._c_node:
raise ReferenceError("Proxy invalidated!")
return 0
cdef int _raise_unsupported_type(self) except -1:
raise TypeError("Unsupported node type: %d" % self._c_node.type)
cdef void free_after_use(self):
u"""Should the xmlNode* be freed when releasing the proxy?
"""
self._free_after_use = 1
property tag:
u"""Element tag
"""
def __get__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _namespacedName(self._c_node)
elif self._c_node.type == tree.XML_PI_NODE:
return ProcessingInstruction
elif self._c_node.type == tree.XML_COMMENT_NODE:
return Comment
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return Entity
else:
self._raise_unsupported_type()
property text:
u"""Text before the first subelement. This is either a string or
the value None, if there was no text.
"""
def __get__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _collectText(self._c_node.children)
elif self._c_node.type in (tree.XML_PI_NODE,
tree.XML_COMMENT_NODE):
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return u'&%s;' % funicode(self._c_node.name)
else:
self._raise_unsupported_type()
property tail:
u"""Text after this element's end tag, but before the next sibling
element's start tag. This is either a string or the value None, if
there was no text.
"""
def __get__(self):
self._assertNode()
return _collectText(self._c_node.next)
property sourceline:
u"""Original line number as found by the parser or None if unknown.
"""
def __get__(self):
cdef long line
self._assertNode()
line = tree.xmlGetLineNo(self._c_node)
if line > 0:
return line
else:
return None
def __repr__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return u"<Element %s at 0x%x>" % (self.tag, id(self))
elif self._c_node.type == tree.XML_COMMENT_NODE:
return u"<!--%s-->" % self.text
elif self._c_node.type == tree.XML_ENTITY_NODE:
return u"&%s;" % funicode(self._c_node.name)
elif self._c_node.type == tree.XML_PI_NODE:
text = self.text
if text:
return u"<?%s %s?>" % (self.target, text)
else:
return u"<?%s?>" % self.target
else:
self._raise_unsupported_type()
def __getitem__(self, x):
u"""Returns the subelement at the given position or the requested
slice.
"""
cdef xmlNode* c_node = NULL
cdef Py_ssize_t step = 0, slicelength = 0
cdef Py_ssize_t c, i
cdef _node_to_node_function next_element
cdef list result
self._assertNode()
if isinstance(x, slice):
# slicing
if _isFullSlice(<slice>x):
return _collectChildren(self)
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
if c_node is NULL:
return []
if step > 0:
next_element = _nextElement
else:
step = -step
next_element = _previousElement
result = []
c = 0
while c_node is not NULL and c < slicelength:
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
result.append(_elementFactory(self._doc, c_node))
c = c + 1
for i from 0 <= i < step:
c_node = next_element(c_node)
return result
else:
# indexing
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"list index out of range"
return _newReadOnlyProxy(self._source_proxy, c_node)
def __len__(self):
u"""Returns the number of subelements.
"""
cdef Py_ssize_t c
cdef xmlNode* c_node
self._assertNode()
c = 0
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
c = c + 1
c_node = c_node.next
return c
def __nonzero__(self):
cdef xmlNode* c_node
self._assertNode()
c_node = _findChildBackwards(self._c_node, 0)
return c_node != NULL
def __deepcopy__(self, memo):
u"__deepcopy__(self, memo)"
return self.__copy__()
cpdef __copy__(self):
u"__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
if self._c_node is NULL:
return self
c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
new_doc = _documentFactory(c_doc, None)
root = new_doc.getroot()
if root is not None:
return root
# Comment/PI
c_node = c_doc.children
while c_node is not NULL and c_node.type != self._c_node.type:
c_node = c_node.next
if c_node is NULL:
return None
return _elementFactory(new_doc, c_node)
def __iter__(self):
return iter(self.getchildren())
def iterchildren(self, tag=None, *, reversed=False):
u"""iterchildren(self, tag=None, reversed=False)
Iterate over the children of this element.
"""
children = self.getchildren()
if tag is not None and tag != '*':
children = [ el for el in children if el.tag == tag ]
if reversed:
children = children[::-1]
return iter(children)
cpdef getchildren(self):
u"""Returns all subelements. The elements are returned in document
order.
"""
cdef xmlNode* c_node
cdef list result
self._assertNode()
result = []
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
c_node = c_node.next
return result
def getparent(self):
u"""Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_parent
self._assertNode()
c_parent = self._c_node.parent
if c_parent is NULL or not tree._isElement(c_parent):
return None
else:
return _newReadOnlyProxy(self._source_proxy, c_parent)
def getnext(self):
u"""Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _nextElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
def getprevious(self):
u"""Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _previousElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
@cython.final
@cython.internal
cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
u"A read-only proxy for processing instructions (for internal use only!)"
property target:
def __get__(self):
self._assertNode()
return funicode(self._c_node.name)
@cython.final
@cython.internal
cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
u"A read-only proxy for entity references (for internal use only!)"
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value_utf = _utf8(value)
if u'&' in value or u';' in value:
raise ValueError(u"Invalid entity name '%s'" % value)
tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
property text:
def __get__(self):
return u'&%s;' % funicode(self._c_node.name)
@cython.internal
cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
u"The main read-only Element proxy class (for internal use only!)."
property attrib:
def __get__(self):
self._assertNode()
return dict(_collectAttributes(self._c_node, 3))
property prefix:
u"""Namespace prefix or None.
"""
def __get__(self):
self._assertNode()
if self._c_node.ns is not NULL:
if self._c_node.ns.prefix is not NULL:
return funicode(self._c_node.ns.prefix)
return None
def get(self, key, default=None):
u"""Gets an element attribute.
"""
self._assertNode()
return _getNodeAttributeValue(self._c_node, key, default)
def keys(self):
u"""Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
self._assertNode()
return _collectAttributes(self._c_node, 1)
def values(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 2)
def items(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 3)
cdef _ReadOnlyProxy _newReadOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
elif c_node.type in (tree.XML_COMMENT_NODE,
tree.XML_ENTITY_REF_NODE):
el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
else:
raise TypeError("Unsupported element type: %d" % c_node.type)
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
_ReadOnlyProxy source_proxy):
if source_proxy is None:
el._source_proxy = el
el._dependent_proxies = [el]
else:
el._source_proxy = source_proxy
source_proxy._dependent_proxies.append(el)
cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
cdef xmlNode* c_node
cdef _ReadOnlyProxy el
if sourceProxy is None:
return
if sourceProxy._dependent_proxies is None:
return
for el in sourceProxy._dependent_proxies:
c_node = el._c_node
el._c_node = NULL
if el._free_after_use:
tree.xmlFreeNode(c_node)
del sourceProxy._dependent_proxies[:]
# opaque wrapper around non-element nodes, e.g. the document node
#
# This class does not imply any restrictions on modifiability or
# read-only status of the node, so use with caution.
@cython.internal
cdef class _OpaqueNodeWrapper:
cdef tree.xmlNode* _c_node
def __init__(self):
raise TypeError, u"This type cannot be instantiated from Python"
@cython.final
@cython.internal
cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
assert self._c_node is not NULL, u"Proxy invalidated!"
return 0
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
if c_node.type == tree.XML_ELEMENT_NODE:
if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
raise ValueError, u"cannot append, document already has a root element"
elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
raise TypeError, u"unsupported element type for top-level node: %d" % c_node.type
c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
cdef _OpaqueNodeWrapper node
if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
else:
node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
node._c_node = c_node
return node
# element proxies that allow restricted modification
@cython.internal
cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
u"""A read-only proxy that allows changing the text content.
"""
property text:
def __get__(self):
self._assertNode()
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
def __set__(self, value):
cdef tree.xmlDict* c_dict
self._assertNode()
if value is None:
c_text = <const_xmlChar*>NULL
else:
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetContent(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
u"""A read-only proxy that allows changing the text/target content of a
processing instruction.
"""
property target:
def __get__(self):
self._assertNode()
return funicode(self._c_node.name)
def __set__(self, value):
self._assertNode()
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
u"A read-only proxy for entity references (for internal use only!)"
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value = _utf8(value)
assert u'&' not in value and u';' not in value, \
u"Invalid entity name '%s'" % value
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
u"""A read-only element that allows adding children and changing the
text content (i.e. everything that adds to the subtree).
"""
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
c_node = _copyNodeToDoc(c_node, self._c_node.doc)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
property text:
u"""Text before the first subelement. This is either a string or the
value None, if there was no text.
"""
def __get__(self):
self._assertNode()
return _collectText(self._c_node.children)
def __set__(self, value):
self._assertNode()
if isinstance(value, QName):
value = _resolveQNameText(self, value).decode('utf8')
_setNodeText(self._c_node, value)
cdef _ReadOnlyProxy _newAppendOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
elif c_node.type == tree.XML_COMMENT_NODE:
el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
else:
raise TypeError("Unsupported element type: %d" % c_node.type)
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef xmlNode* _roNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _ReadOnlyProxy):
c_node = (<_ReadOnlyProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, u"invalid argument type %s" % type(element)
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node
cdef xmlNode* _nonRoNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _AppendOnlyElementProxy):
c_node = (<_AppendOnlyElementProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, u"invalid argument type %s" % type(element)
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node