/
lxml.etree.pyx
3270 lines (2769 loc) · 115 KB
/
lxml.etree.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
u"""The ``lxml.etree`` module implements the extended ElementTree API
for XML.
"""
from __future__ import absolute_import
__docformat__ = u"restructuredtext en"
__all__ = [
'AttributeBasedElementClassLookup', 'C14NError', 'CDATA',
'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG',
'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError',
'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element',
'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup',
'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase',
'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension',
'FallbackElementClassLookup', 'FunctionNamespace', 'HTML',
'HTMLParser', 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION',
'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError',
'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction',
'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG',
'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError',
'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError',
'SchematronParseError', 'SchematronValidateError', 'SerialisationError',
'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML',
'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError',
'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError',
'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathError',
'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError',
'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError',
'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError',
'XSLTSaveError', 'cleanup_namespaces', 'clear_error_log', 'dump',
'fromstring', 'fromstringlist', 'get_default_parser', 'iselement',
'iterparse', 'iterwalk', 'parse', 'parseid', 'register_namespace',
'set_default_parser', 'set_element_class_lookup', 'strip_attributes',
'strip_elements', 'strip_tags', 'tostring', 'tostringlist', 'tounicode',
'use_global_python_log'
]
cimport cython
from lxml cimport python
from lxml.includes cimport tree, config
from lxml.includes.tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
from lxml.includes.tree cimport const_xmlChar, xmlChar, _xcstr
from lxml.python cimport _cstr, _isString
from lxml.includes cimport xpath
from lxml.includes cimport c14n
# Cython's standard declarations
cimport cpython.mem
cimport cpython.ref
from libc cimport limits, stdio, stdlib
from libc cimport string as cstring_h # not to be confused with stdlib 'string'
from libc.string cimport const_char
try:
import __builtin__
except ImportError:
# Python 3
import builtins as __builtin__
cdef object _unicode
try:
_unicode = __builtin__.unicode
except AttributeError:
# Python 3
_unicode = __builtin__.str
del __builtin__
cdef object os_path_abspath
from os.path import abspath as os_path_abspath
cdef object BytesIO, StringIO
try:
from io import BytesIO, StringIO
except (ImportError, AttributeError):
from StringIO import StringIO, StringIO as BytesIO
cdef object _elementpath
from lxml import _elementpath
cdef object sys
import sys
cdef object re
import re
cdef object gzip
import gzip
cdef object ITER_EMPTY = iter(())
cdef object EMPTY_READ_ONLY_DICT = python.PyDictProxy_New({})
# the rules
# ---------
# any libxml C argument/variable is prefixed with c_
# any non-public function/class is prefixed with an underscore
# instance creation is always through factories
# what to do with libxml2/libxslt error messages?
# 0 : drop
# 1 : use log
DEF __DEBUG = 1
# maximum number of lines in the libxml2/xslt log if __DEBUG == 1
DEF __MAX_LOG_SIZE = 100
# make the compiled-in debug state publicly available
DEBUG = __DEBUG
# A struct to store a cached qualified tag name+href pair.
# While we can borrow the c_name from the document dict,
# PyPy requires us to store a Python reference for the
# namespace in order to keep the byte buffer alive.
cdef struct qname:
const_xmlChar* c_name
python.PyObject* href
# global per-thread setup
tree.xmlThrDefIndentTreeOutput(1)
tree.xmlThrDefLineNumbersDefaultValue(1)
_initThreadLogging()
# initialize parser (and threading)
xmlparser.xmlInitParser()
# filename encoding
cdef bytes _FILENAME_ENCODING = (sys.getfilesystemencoding() or sys.getdefaultencoding() or 'ascii').encode(u"UTF-8")
cdef char* _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING)
# set up some default namespace prefixes
cdef dict _DEFAULT_NAMESPACE_PREFIXES = {
b"http://www.w3.org/XML/1998/namespace": b'xml',
b"http://www.w3.org/1999/xhtml": b"html",
b"http://www.w3.org/1999/XSL/Transform": b"xsl",
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#": b"rdf",
b"http://schemas.xmlsoap.org/wsdl/": b"wsdl",
# xml schema
b"http://www.w3.org/2001/XMLSchema": b"xs",
b"http://www.w3.org/2001/XMLSchema-instance": b"xsi",
# dublin core
b"http://purl.org/dc/elements/1.1/": b"dc",
# objectify
b"http://codespeak.net/lxml/objectify/pytype" : b"py",
}
cdef object _check_internal_prefix = re.compile(b"ns\d+$").match
def register_namespace(prefix, uri):
u"""Registers a namespace prefix that newly created Elements in that
namespace will use. The registry is global, and any existing
mapping for either the given prefix or the namespace URI will be
removed.
"""
prefix_utf, uri_utf = _utf8(prefix), _utf8(uri)
if _check_internal_prefix(prefix_utf):
raise ValueError("Prefix format reserved for internal use")
_tagValidOrRaise(prefix_utf)
_uriValidOrRaise(uri_utf)
for k, v in list(_DEFAULT_NAMESPACE_PREFIXES.items()):
if k == uri_utf or v == prefix_utf:
del _DEFAULT_NAMESPACE_PREFIXES[k]
_DEFAULT_NAMESPACE_PREFIXES[uri_utf] = prefix_utf
# Error superclass for ElementTree compatibility
class Error(Exception):
pass
# module level superclass for all exceptions
class LxmlError(Error):
u"""Main exception base class for lxml. All other exceptions inherit from
this one.
"""
def __init__(self, message, error_log=None):
if python.PY_VERSION_HEX >= 0x02050000:
# Python >= 2.5 uses new style class exceptions
super(_Error, self).__init__(message)
else:
error_super_init(self, message)
if error_log is None:
self.error_log = __copyGlobalErrorLog()
else:
self.error_log = error_log.copy()
cdef object _Error = Error if python.PY_VERSION_HEX >= 0x02050000 else None
cdef object error_super_init = Error.__init__ if python.PY_VERSION_HEX < 0x02050000 else None
# superclass for all syntax errors
class LxmlSyntaxError(LxmlError, SyntaxError):
u"""Base class for all syntax errors.
"""
pass
class C14NError(LxmlError):
u"""Error during C14N serialisation.
"""
pass
# version information
cdef __unpackDottedVersion(version):
cdef list version_list = []
l = (version.decode("ascii").replace(u'-', u'.').split(u'.') + [0]*4)[:4]
for item in l:
try:
item = int(item)
except ValueError:
if item.startswith(u'dev'):
count = item[3:]
item = -300
elif item.startswith(u'alpha'):
count = item[5:]
item = -200
elif item.startswith(u'beta'):
count = item[4:]
item = -100
else:
count = 0
if count:
item += int(count)
version_list.append(item)
return tuple(version_list)
cdef __unpackIntVersion(int c_version):
return (
((c_version / (100*100)) % 100),
((c_version / 100) % 100),
(c_version % 100)
)
cdef int _LIBXML_VERSION_INT
try:
_LIBXML_VERSION_INT = int(
re.match(u'[0-9]+', (<unsigned char*>tree.xmlParserVersion).decode("ascii")).group(0))
except Exception:
print u"Unknown libxml2 version: %s" % (<unsigned char*>tree.xmlParserVersion).decode("ascii")
_LIBXML_VERSION_INT = 0
LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT)
LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
__version__ = (tree.LXML_VERSION_STRING).decode("ascii")
# class for temporary storage of Python references,
# used e.g. for XPath results
@cython.final
@cython.internal
cdef class _TempStore:
cdef list _storage
def __init__(self):
self._storage = []
cdef int add(self, obj) except -1:
self._storage.append(obj)
return 0
cdef int clear(self) except -1:
del self._storage[:]
return 0
# class for temporarily storing exceptions raised in extensions
@cython.internal
cdef class _ExceptionContext:
cdef object _exc_info
cdef void clear(self):
self._exc_info = None
cdef void _store_raised(self):
self._exc_info = sys.exc_info()
cdef void _store_exception(self, exception):
self._exc_info = (exception, None, None)
cdef bint _has_raised(self):
return self._exc_info is not None
cdef int _raise_if_stored(self) except -1:
if self._exc_info is None:
return 0
type, value, traceback = self._exc_info
self._exc_info = None
if value is None and traceback is None:
raise type
else:
raise type, value, traceback
# forward declarations
cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]
cdef public class _Element [ type LxmlElementType, object LxmlElement ]
cdef class _BaseParser
cdef class QName
ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*)
################################################################################
# Include submodules
include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.)
include "apihelpers.pxi" # Private helper functions
include "xmlerror.pxi" # Error and log handling
################################################################################
# Public Python API
cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
u"""Internal base class to reference a libxml document.
When instances of this class are garbage collected, the libxml
document is cleaned up.
"""
cdef int _ns_counter
cdef bytes _prefix_tail
cdef xmlDoc* _c_doc
cdef _BaseParser _parser
def __dealloc__(self):
# if there are no more references to the document, it is safe
# to clean the whole thing up, as all nodes have a reference to
# the document
tree.xmlFreeDoc(self._c_doc)
@cython.final
cdef getroot(self):
# return an element proxy for the document root
cdef xmlNode* c_node
c_node = tree.xmlDocGetRootElement(self._c_doc)
if c_node is NULL:
return None
return _elementFactory(self, c_node)
@cython.final
cdef bint hasdoctype(self):
# DOCTYPE gets parsed into internal subset (xmlDTD*)
return self._c_doc is not NULL and self._c_doc.intSubset is not NULL
@cython.final
cdef getdoctype(self):
# get doctype info: root tag, public/system ID (or None if not known)
cdef tree.xmlDtd* c_dtd
cdef xmlNode* c_root_node
public_id = None
sys_url = None
c_dtd = self._c_doc.intSubset
if c_dtd is not NULL:
if c_dtd.ExternalID is not NULL:
public_id = funicode(c_dtd.ExternalID)
if c_dtd.SystemID is not NULL:
sys_url = funicode(c_dtd.SystemID)
c_dtd = self._c_doc.extSubset
if c_dtd is not NULL:
if not public_id and c_dtd.ExternalID is not NULL:
public_id = funicode(c_dtd.ExternalID)
if not sys_url and c_dtd.SystemID is not NULL:
sys_url = funicode(c_dtd.SystemID)
c_root_node = tree.xmlDocGetRootElement(self._c_doc)
if c_root_node is NULL:
root_name = None
else:
root_name = funicode(c_root_node.name)
return (root_name, public_id, sys_url)
@cython.final
cdef getxmlinfo(self):
# return XML version and encoding (or None if not known)
cdef xmlDoc* c_doc = self._c_doc
if c_doc.version is NULL:
version = None
else:
version = funicode(c_doc.version)
if c_doc.encoding is NULL:
encoding = None
else:
encoding = funicode(c_doc.encoding)
return (version, encoding)
@cython.final
cdef isstandalone(self):
# returns True for "standalone=true",
# False for "standalone=false", None if not provided
if self._c_doc.standalone == -1:
return None
else:
return <bint>(self._c_doc.standalone == 1)
@cython.final
cdef bytes buildNewPrefix(self):
# get a new unique prefix ("nsX") for this document
cdef bytes ns
if self._ns_counter < len(_PREFIX_CACHE):
ns = _PREFIX_CACHE[self._ns_counter]
else:
ns = python.PyBytes_FromFormat("ns%d", self._ns_counter)
if self._prefix_tail is not None:
ns += self._prefix_tail
self._ns_counter += 1
if self._ns_counter < 0:
# overflow!
self._ns_counter = 0
if self._prefix_tail is None:
self._prefix_tail = b"A"
else:
self._prefix_tail += b"A"
return ns
@cython.final
cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
const_xmlChar* c_href, const_xmlChar* c_prefix,
bint is_attribute) except NULL:
u"""Get or create namespace structure for a node. Reuses the prefix if
possible.
"""
cdef xmlNs* c_ns
cdef xmlNs* c_doc_ns
cdef python.PyObject* dict_result
if c_node.type != tree.XML_ELEMENT_NODE:
assert c_node.type == tree.XML_ELEMENT_NODE, \
u"invalid node type %d, expected %d" % (
c_node.type, tree.XML_ELEMENT_NODE)
# look for existing ns declaration
c_ns = _searchNsByHref(c_node, c_href, is_attribute)
if c_ns is not NULL:
if is_attribute and c_ns.prefix is NULL:
# do not put namespaced attributes into the default
# namespace as this would break serialisation
pass
else:
return c_ns
# none found => determine a suitable new prefix
if c_prefix is NULL:
dict_result = python.PyDict_GetItem(
_DEFAULT_NAMESPACE_PREFIXES, <unsigned char*>c_href)
if dict_result is not NULL:
prefix = <object>dict_result
else:
prefix = self.buildNewPrefix()
c_prefix = _xcstr(prefix)
# make sure the prefix is not in use already
while tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
prefix = self.buildNewPrefix()
c_prefix = _xcstr(prefix)
# declare the namespace and return it
c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
if c_ns is NULL:
raise MemoryError()
return c_ns
@cython.final
cdef int _setNodeNs(self, xmlNode* c_node, const_xmlChar* c_href) except -1:
u"Lookup namespace structure and set it for the node."
c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
tree.xmlSetNs(c_node, c_ns)
cdef tuple __initPrefixCache():
cdef int i
return tuple([ python.PyBytes_FromFormat("ns%d", i)
for i in range(30) ])
cdef tuple _PREFIX_CACHE
_PREFIX_CACHE = __initPrefixCache()
cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
cdef _Document result
result = _Document.__new__(_Document)
result._c_doc = c_doc
result._ns_counter = 0
result._prefix_tail = None
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
result._parser = parser
return result
cdef class DocInfo:
u"Document information provided by parser and DTD."
cdef _Document _doc
def __cinit__(self, tree):
u"Create a DocInfo object for an ElementTree object or root Element."
self._doc = _documentOrRaise(tree)
root_name, public_id, system_url = self._doc.getdoctype()
if not root_name and (public_id or system_url):
raise ValueError, u"Could not find root node"
property root_name:
u"Returns the name of the root node as defined by the DOCTYPE."
def __get__(self):
root_name, public_id, system_url = self._doc.getdoctype()
return root_name
property public_id:
u"Returns the public ID of the DOCTYPE."
def __get__(self):
root_name, public_id, system_url = self._doc.getdoctype()
return public_id
property system_url:
u"Returns the system ID of the DOCTYPE."
def __get__(self):
root_name, public_id, system_url = self._doc.getdoctype()
return system_url
property xml_version:
u"Returns the XML version as declared by the document."
def __get__(self):
xml_version, encoding = self._doc.getxmlinfo()
return xml_version
property encoding:
u"Returns the encoding name as declared by the document."
def __get__(self):
xml_version, encoding = self._doc.getxmlinfo()
return encoding
property standalone:
u"""Returns the standalone flag as declared by the document. The possible
values are True (``standalone='yes'``), False
(``standalone='no'`` or flag not provided in the declaration),
and None (unknown or no declaration found). Note that a
normal truth test on this value will always tell if the
``standalone`` flag was set to ``'yes'`` or not.
"""
def __get__(self):
return self._doc.isstandalone()
property URL:
u"The source URL of the document (or None if unknown)."
def __get__(self):
if self._doc._c_doc.URL is NULL:
return None
return _decodeFilename(self._doc._c_doc.URL)
def __set__(self, url):
url = _encodeFilename(url)
c_oldurl = self._doc._c_doc.URL
if url is None:
self._doc._c_doc.URL = NULL
else:
self._doc._c_doc.URL = tree.xmlStrdup(_xcstr(url))
if c_oldurl is not NULL:
tree.xmlFree(<void*>c_oldurl)
property doctype:
u"Returns a DOCTYPE declaration string for the document."
def __get__(self):
root_name, public_id, system_url = self._doc.getdoctype()
if public_id:
if system_url:
return u'<!DOCTYPE %s PUBLIC "%s" "%s">' % (
root_name, public_id, system_url)
else:
return u'<!DOCTYPE %s PUBLIC "%s">' % (
root_name, public_id)
elif system_url:
return u'<!DOCTYPE %s SYSTEM "%s">' % (
root_name, system_url)
elif self._doc.hasdoctype():
return u'<!DOCTYPE %s>' % root_name
else:
return u""
property internalDTD:
u"Returns a DTD validator based on the internal subset of the document."
def __get__(self):
return _dtdFactory(self._doc._c_doc.intSubset)
property externalDTD:
u"Returns a DTD validator based on the external subset of the document."
def __get__(self):
return _dtdFactory(self._doc._c_doc.extSubset)
cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
u"""Element class.
References a document object and a libxml node.
By pointing to a Document instance, a reference is kept to
_Document as long as there is some pointer to a node in it.
"""
cdef python.PyObject* _gc_doc
cdef _Document _doc
cdef xmlNode* _c_node
cdef object _tag
def _init(self):
u"""_init(self)
Called after object initialisation. Custom subclasses may override
this if they recursively call _init() in the superclasses.
"""
def __dealloc__(self):
#print "trying to free node:", <int>self._c_node
#displayNode(self._c_node, 0)
if self._c_node is not NULL:
_unregisterProxy(self)
attemptDeallocation(self._c_node)
_releaseProxy(self)
# MANIPULATORS
def __setitem__(self, x, value):
u"""__setitem__(self, x, value)
Replaces the given subelement index or slice.
"""
cdef xmlNode* c_node = NULL
cdef xmlNode* c_next
cdef xmlDoc* c_source_doc
cdef _Element element
cdef bint left_to_right
cdef Py_ssize_t slicelength = 0, step = 0
_assertValidNode(self)
if value is None:
raise ValueError, u"cannot assign None"
if python.PySlice_Check(x):
# slice assignment
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
if step > 0:
left_to_right = 1
else:
left_to_right = 0
step = -step
_replaceSlice(self, c_node, slicelength, step, left_to_right, value)
return
else:
# otherwise: normal item assignment
element = value
_assertValidNode(element)
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"list index out of range"
c_source_doc = element._c_node.doc
c_next = element._c_node.next
_removeText(c_node.next)
tree.xmlReplaceNode(c_node, element._c_node)
_moveTail(c_next, element._c_node)
moveNodeToDocument(self._doc, c_source_doc, element._c_node)
if not attemptDeallocation(c_node):
moveNodeToDocument(self._doc, c_node.doc, c_node)
def __delitem__(self, x):
u"""__delitem__(self, x)
Deletes the given subelement or a slice.
"""
cdef xmlNode* c_node = NULL
cdef xmlNode* c_next
cdef Py_ssize_t step = 0, slicelength = 0
_assertValidNode(self)
if python.PySlice_Check(x):
# slice deletion
if _isFullSlice(<slice>x):
c_node = self._c_node.children
if c_node is not NULL:
if not _isElement(c_node):
c_node = _nextElement(c_node)
while c_node is not NULL:
c_next = _nextElement(c_node)
_removeNode(self._doc, c_node)
c_node = c_next
else:
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
_deleteSlice(self._doc, c_node, slicelength, step)
else:
# item deletion
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"index out of range: %d" % x
_removeText(c_node.next)
_removeNode(self._doc, c_node)
def __deepcopy__(self, memo):
u"__deepcopy__(self, memo)"
return self.__copy__()
def __copy__(self):
u"__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
_assertValidNode(self)
c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive
new_doc = _documentFactory(c_doc, self._doc._parser)
root = new_doc.getroot()
if root is not None:
return root
# Comment/PI
c_node = c_doc.children
while c_node is not NULL and c_node.type != self._c_node.type:
c_node = c_node.next
if c_node is NULL:
return None
return _elementFactory(new_doc, c_node)
def set(self, key, value):
u"""set(self, key, value)
Sets an element attribute.
"""
_assertValidNode(self)
_setAttributeValue(self, key, value)
def append(self, _Element element not None):
u"""append(self, element)
Adds a subelement to the end of this element.
"""
_assertValidNode(self)
_assertValidNode(element)
_appendChild(self, element)
def addnext(self, _Element element not None):
u"""addnext(self, element)
Adds the element as a following sibling directly after this
element.
This is normally used to set a processing instruction or comment after
the root node of a document. Note that tail text is automatically
discarded when adding at the root level.
"""
_assertValidNode(self)
_assertValidNode(element)
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
element.tail = None
_appendSibling(self, element)
def addprevious(self, _Element element not None):
u"""addprevious(self, element)
Adds the element as a preceding sibling directly before this
element.
This is normally used to set a processing instruction or comment
before the root node of a document. Note that tail text is
automatically discarded when adding at the root level.
"""
_assertValidNode(self)
_assertValidNode(element)
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
element.tail = None
_prependSibling(self, element)
def extend(self, elements):
u"""extend(self, elements)
Extends the current children by the elements in the iterable.
"""
cdef _Element element
_assertValidNode(self)
for element in elements:
if element is None:
raise TypeError, u"Node must not be None"
_assertValidNode(element)
_appendChild(self, element)
def clear(self):
u"""clear(self)
Resets an element. This function removes all subelements, clears
all attributes and sets the text and tail properties to None.
"""
cdef xmlAttr* c_attr
cdef xmlAttr* c_attr_next
cdef xmlNode* c_node
cdef xmlNode* c_node_next
_assertValidNode(self)
c_node = self._c_node
# remove self.text and self.tail
_removeText(c_node.children)
_removeText(c_node.next)
# remove all attributes
c_attr = c_node.properties
while c_attr is not NULL:
c_attr_next = c_attr.next
tree.xmlRemoveProp(c_attr)
c_attr = c_attr_next
# remove all subelements
c_node = c_node.children
if c_node is not NULL:
if not _isElement(c_node):
c_node = _nextElement(c_node)
while c_node is not NULL:
c_node_next = _nextElement(c_node)
_removeNode(self._doc, c_node)
c_node = c_node_next
def insert(self, index, _Element element not None):
u"""insert(self, index, element)
Inserts a subelement at the given position in this element
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef xmlDoc* c_source_doc
_assertValidNode(self)
_assertValidNode(element)
c_node = _findChild(self._c_node, index)
if c_node is NULL:
_appendChild(self, element)
return
c_source_doc = c_node.doc
c_next = element._c_node.next
tree.xmlAddPrevSibling(c_node, element._c_node)
_moveTail(c_next, element._c_node)
moveNodeToDocument(self._doc, c_source_doc, element._c_node)
def remove(self, _Element element not None):
u"""remove(self, element)
Removes a matching subelement. Unlike the find methods, this
method compares elements based on identity, not on tag value
or contents.
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
_assertValidNode(self)
_assertValidNode(element)
c_node = element._c_node
if c_node.parent is not self._c_node:
raise ValueError, u"Element is not a child of this node."
c_next = element._c_node.next
tree.xmlUnlinkNode(c_node)
_moveTail(c_next, c_node)
# fix namespace declarations
moveNodeToDocument(self._doc, c_node.doc, c_node)
def replace(self, _Element old_element not None,
_Element new_element not None):
u"""replace(self, old_element, new_element)
Replaces a subelement with the element passed as second argument.
"""
cdef xmlNode* c_old_node
cdef xmlNode* c_old_next
cdef xmlNode* c_new_node
cdef xmlNode* c_new_next
cdef xmlDoc* c_source_doc
_assertValidNode(self)
_assertValidNode(old_element)
_assertValidNode(new_element)
c_old_node = old_element._c_node
if c_old_node.parent is not self._c_node:
raise ValueError, u"Element is not a child of this node."
c_old_next = c_old_node.next
c_new_node = new_element._c_node
c_new_next = c_new_node.next
c_source_doc = c_new_node.doc
tree.xmlReplaceNode(c_old_node, c_new_node)
_moveTail(c_new_next, c_new_node)
_moveTail(c_old_next, c_old_node)
moveNodeToDocument(self._doc, c_source_doc, c_new_node)
# fix namespace declarations
moveNodeToDocument(self._doc, c_old_node.doc, c_old_node)
# PROPERTIES
property tag:
u"""Element tag
"""
def __get__(self):
if self._tag is not None:
return self._tag
_assertValidNode(self)
self._tag = _namespacedName(self._c_node)
return self._tag
def __set__(self, value):
cdef _BaseParser parser
_assertValidNode(self)
ns, name = _getNsTag(value)
parser = self._doc._parser
if parser is not None and parser._for_html:
_htmlTagValidOrRaise(name)
else:
_tagValidOrRaise(name)
self._tag = value
tree.xmlNodeSetName(self._c_node, _xcstr(name))
if ns is None:
self._c_node.ns = NULL
else:
self._doc._setNodeNs(self._c_node, _xcstr(ns))
property attrib:
u"""Element attribute dictionary. Where possible, use get(), set(),
keys(), values() and items() to access element attributes.
"""
def __get__(self):
_assertValidNode(self)
return _Attrib(self)
property text:
u"""Text before the first subelement. This is either a string or
the value None, if there was no text.
"""
def __get__(self):
_assertValidNode(self)
return _collectText(self._c_node.children)
def __set__(self, value):
_assertValidNode(self)
if isinstance(value, QName):
value = python.PyUnicode_FromEncodedObject(
_resolveQNameText(self, value), 'UTF-8', 'strict')
_setNodeText(self._c_node, value)
# using 'del el.text' is the wrong thing to do
#def __del__(self):
# _setNodeText(self._c_node, None)
property tail:
u"""Text after this element's end tag, but before the next sibling
element's start tag. This is either a string or the value None, if
there was no text.
"""
def __get__(self):
_assertValidNode(self)
return _collectText(self._c_node.next)
def __set__(self, value):
_assertValidNode(self)
_setTailText(self._c_node, value)
# using 'del el.tail' is the wrong thing to do
#def __del__(self):
# _setTailText(self._c_node, None)
# not in ElementTree, read-only
property prefix:
u"""Namespace prefix or None.
"""
def __get__(self):
if self._c_node.ns is not NULL:
if self._c_node.ns.prefix is not NULL:
return funicode(self._c_node.ns.prefix)
return None
# not in ElementTree, read-only
property sourceline:
u"""Original line number as found by the parser or None if unknown.
"""
def __get__(self):
cdef long line
_assertValidNode(self)
line = tree.xmlGetLineNo(self._c_node)
if line > 0:
return line
else:
return None
def __set__(self, line):
_assertValidNode(self)
if line < 0:
self._c_node.line = 0
else:
self._c_node.line = line
# not in ElementTree, read-only
property nsmap:
u"""Namespace prefix->URI mapping known in the context of this
Element. This includes all namespace declarations of the
parents.
Note that changing the returned dict has no effect on the Element.
"""
def __get__(self):
cdef xmlNode* c_node
cdef xmlNs* c_ns
cdef dict nsmap = {}
_assertValidNode(self)
c_node = self._c_node
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
c_ns = c_node.nsDef
while c_ns is not NULL:
prefix = None if c_ns.prefix is NULL else funicode(c_ns.prefix)
if prefix not in nsmap:
nsmap[prefix] = None if c_ns.href is NULL else funicode(c_ns.href)
c_ns = c_ns.next
c_node = c_node.parent
return nsmap
# not in ElementTree, read-only
property base: