/
structures.py
3905 lines (3232 loc) · 142 KB
/
structures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#! /usr/bin/env python
import codecs
import io
import logging
import os
import os.path
import random
import warnings
from copy import copy
from types import MethodType
from .. import rfc2396 as uri
from ..http import client as http
from ..pep8 import (
MigratedClass,
old_function,
old_method)
from ..py2 import (
character,
dict_items,
dict_keys,
force_text,
is_text,
is_unicode,
join_characters,
range3,
to_text,
uempty,
ul,
uspace,
UnicodeMixin)
from ..unicode5 import CharClass
class XMLError(Exception):
"""Base class for all exceptions raised by this module."""
pass
class DuplicateXMLNAME(XMLError):
"""Raised by :py:func:`map_class_elements`
Indicates an attempt to declare two classes with the same XML
name."""
pass
class XMLAttributeSetter(XMLError):
"""Raised when a badly formed attribute mapping is found."""
pass
class XMLMissingResourceError(XMLError):
"""Raised when an entity cannot be found (e.g., missing file).
Also raised when an external entity reference is encountered but the
opening of external entities is turned off."""
pass
class XMLMissingLocationError(XMLError):
"""Raised when on create, read or update when base_uri is None"""
pass
class XMLMixedContentError(XMLError):
"""Raised by :meth:`Element.get_value`
Indicates unexpected element children."""
pass
class XMLParentError(XMLError):
"""Raised by :meth:`Element.attach_to_parent`
Indicates that the element was not an orphan."""
pass
class XMLUnexpectedHTTPResponse(XMLError):
"""Raised by :meth:`Document.open_uri`
The message contains the response code and status message received
from the server."""
pass
class XMLUnsupportedSchemeError(XMLError):
""":attr:`Document.base_uri` has an unsupported scheme
Currently only file, http and https schemes are supported for open
operations. For create and update operations, only file types are
supported."""
pass
class XMLValidityError(XMLError):
"""Base class for all validation errors
Raised when a document or content model violates a validity
constraint. These errors can be generated by the parser (for
example, when validating a document against a declared DTD) or by
Elements themselves when content is encountered that does not fit
content model expected."""
pass
class XMLIDClashError(XMLValidityError):
"""A validity error caused by two elements with the same ID"""
pass
class XMLIDValueError(XMLValidityError):
"""A validity error caused by an element with an invalid ID
ID attribute must satisfy the production for NAME."""
pass
class XMLUnknownChild(XMLError):
"""Raised by :meth:`Element.remove_child`
Indicates that the child being removed was not found in the
element's content."""
pass
s = CharClass("\x20\x09\x0A\x0D")
def is_s(c):
"""Tests production [3] S
Optimized for speed as this function is called a lot by the parser."""
return c is not None and c in "\x20\x09\x0A\x0D"
def collapse_space(data, smode=True, stest=is_s):
"""Returns data with all spaces collapsed to a single space.
smode
Determines the fate of any leading space, by default it is True
and leading spaces are ignored provided the string has some
non-space characters.
stest
You can override the test of what consitutes a space by passing
a function for stest, by default we use :func:`is_s` and any
value passed to stest should behave similarly.
Note on degenerate case: this function is intended to be called with
non-empty strings and will never *return* an empty string. If there
is no data then a single space is returned (regardless of smode)."""
result = []
for c in data:
if stest(c):
if not smode:
result.append(uspace)
smode = True
else:
smode = False
result.append(c)
if result:
return ''.join(result)
else:
return uspace
name_start_char = CharClass(
':', ('A', 'Z'), '_', ('a', 'z'), (character(0xc0), character(0xd6)),
(character(0xd8), character(0xf6)), (character(0xf8), character(0x02ff)),
(character(0x0370), character(0x037d)),
(character(0x037f), character(0x1fff)),
(character(0x200c), character(0x200d)),
(character(0x2070), character(0x218f)),
(character(0x2c00), character(0x2fef)),
(character(0x3001), character(0xd7ff)),
(character(0xf900), character(0xfdcf)),
(character(0xfdf0), character(0xfffd)))
@old_function('IsNameStartChar')
def is_name_start_char(c):
return name_start_char.test(c)
is_name_start_char = name_start_char.test # noqa (def used by decorator)
name_char = CharClass(name_start_char, '-', '.', ('0', '9'), character(0xb7),
(character(0x0300), character(0x036f)),
(character(0x203f), character(0x2040)))
@old_function('IsNameChar')
def is_name_char(c):
return name_char.test(c)
is_name_char = name_char.test # noqa (def used by decorator)
@old_function('IsValidName')
def is_valid_name(name):
"""Tests if name is a string matching production [5] Name"""
if name:
if not is_name_start_char(name[0]):
return False
for c in name[1:]:
if not is_name_char(c):
return False
return True
else:
return False
def is_reserved_name(name):
"""Tests if name is reserved
Names beginning with 'xml' are reserved for future standardization"""
if name:
return name[:3].lower() == 'xml'
else:
return False
#: character string constant for "<![CDATA["
CDATA_START = ul('<![CDATA[')
#: character string constant for "]]>"
CDATA_END = ul(']]>')
@old_function('EscapeCDSect')
def escape_cdsect(src):
"""Wraps a string in a CDATA section
src
A character string of data
Returns a character string enclosed in <![CDATA[ ]]> with ]]>
replaced by the clumsy sequence: ]]>]]><![CDATA[
Degenerate case: an empty string is returned as an empty string
"""
data = src.split(CDATA_END)
if data:
result = [CDATA_START, data[0]]
for d in data[1:]:
result.append(ul(']]>]]><![CDATA['))
result.append(d)
result.append(CDATA_END)
return join_characters(result)
else:
return uempty
@old_function('EscapeCharData')
def escape_char_data(src, quote=False):
"""Returns a unicode string with XML reserved characters escaped.
We also escape return characters to prevent them being ignored. If quote
is True then the string is returned as a quoted attribute value."""
data = []
apos = 0
quot = 0
for c in src:
if c == '&':
data.append('&')
elif c == '<':
data.append('<')
elif c == '>':
data.append('>')
elif c == '\r':
data.append('
')
elif c == '"':
quot += 1
data.append(c)
elif c == "'":
apos += 1
data.append(c)
else:
data.append(c)
if quote:
if quot > apos:
if apos:
# escape apos
for i in range3(len(data)):
if data[i] == "'":
data[i] = '''
data = ["'"] + data + ["'"]
else:
if quot:
# escape quot
for i in range3(len(data)):
if data[i] == '"':
data[i] = '"'
data = ['"'] + data + ['"']
return ''.join(data)
@old_function('EscapeCharData7')
def escape_char_data7(src, quote=False):
"""Escapes reserved and non-ASCII characters.
src
A character string
quote (defaults to False)
When True, will surround the output in either single
or double quotes (preferred) depending on the contents
of src.
Characters outside the ASCII range are replaced with character
references."""
dst = []
if quote:
if "'" in src:
q = '"'
qstr = '"'
elif '"' in src:
q = "'"
qstr = '''
else:
q = '"'
qstr = '"'
dst.append(q)
else:
q = None
qstr = ''
for c in src:
if ord(c) > 0x7F:
if ord(c) > 0xFF:
if ord(c) > 0xFFFF:
if ord(c) > 0xFFFFFF:
dst.append("&#x%08X;" % ord(c))
else:
dst.append("&#x%06X;" % ord(c))
else:
dst.append("&#x%04X;" % ord(c))
else:
dst.append("&#x%02X;" % ord(c))
elif c == '<':
dst.append("<")
elif c == '&':
dst.append("&")
elif c == '>':
dst.append(">")
elif c == '\r':
dst.append("
")
elif c == q:
dst.append(qstr)
else:
dst.append(c)
if quote:
dst.append(q)
return ''.join(dst)
_xml_base = 'xml:base'
_xml_lang = 'xml:lang'
_xml_space = 'xml:space'
class Node(UnicodeMixin, MigratedClass):
"""Base class for Element and Document shared attributes.
XML documents are defined hierarchicaly, each element has a parent
which is either another element or an XML document."""
def __init__(self, parent=None):
self.parent = parent
super(Node, self).__init__()
"""The parent of this element, for XML documents this attribute
is used as a sentinel to simplify traversal of the hierarchy and
is set to None."""
def __unicode__(self):
raise NotImplementedError
@old_method('GetChildren')
def get_children(self):
"""Returns an iterator over this object's children."""
raise NotImplementedError
@classmethod
@old_method('GetElementClass')
def get_element_class(cls, name):
"""Returns a class object for representing an element
name
a unicode string representing the element name.
The default implementation returns None - for elements this has
the effect of deferring the call to the parent document (where
this method is overridden to return :py:class:`Element`).
This method is called immediately prior to :py:meth:`add_child`
and (when applicable) :py:meth:`get_child_class`.
The real purpose of this method is to allow an element class to
directly control the way the name of a child element maps to the
class used to represent it. You would normally override this
method in the :py:class:`Document` to map element names to
classes but in some cases you may want to tweek the mapping at
the individual element level. For example, if the same element
name is used for two different purposes in the same XML
document. Although confusing, this is allowed in XML schema."""
return None
@old_method('GetChildClass')
def get_child_class(self, stag_class):
"""Supports custom content model handling
stag_class
The class of an element that is about to be created in the
current context with :meth:`add_child` or the builtin *str*
if data has been recieved in a context where only element
content was expected.
This method is only called when the
:attr:`XMLParser.sgml_omittag` option is in effect. It is called
prior to :py:meth:`add_child` and gives the context (the parent
element or document) a chance to modify the child element that
will be created or indicate the end of the current element through
use of the OMITTAG feature of SGML.
It returns the class of an element whose start tag has been
omitted from the the document and should be added at this point
or None if stag_class implies the end of the current element
*and* the end tag may be omitted.
Otherwise this method should return stag_class unchanged (the
default implementation does this) indicating that the parser
should proceed as normal. In the case of unexpected data this
is treated as a validity error and handled according to the
parser's validity checking options.
Validation errors are dealt with by the parser or, where the
model is encoded into the classes themselves, by
:meth;`add_child` and *not* by this method which should never
raise validation errors.
Although not necessary for true XML parsing this method allows
us to support the parsing of XML-like documents that omit tags,
such as HTML. For example, suppose we have the following
document::
<title>My Blank HTML Page</title>
The parser would recognise the start tag for <title> and then
call this method (on the HTML document) passing the
:class:`pyslet.html.Title` class. For HTML documents, this
method always returns the :class:`pyslet.html401.HTML` class
(ignoring stag_class completely). The result is that an HTML
element is opened instead and the parser tries again, calling
this method for the new HTML element. That does not accept
Title either and returns the :class:`pyslet.html.Head` class.
Finally, a Head element is opened and that will accept Title as
a child so it returns stag_class unchanged and the parser
continues having inferred the omitted tags: <html> and <head>."""
return stag_class
@old_method('ChildElement')
def add_child(self, child_class, name=None):
"""Returns a new child of the given class attached to this object.
child_class
A class (or callable) used to create a new instance of
:py:class:`Element`.
name
The name given to the element (by the caller). If no name
is given then the default name for the child is used. When
the child returned is an existing instance, name is
ignored."""
raise NotImplementedError
def processing_instruction(self, target, instruction=''):
"""Abstract method for handling processing instructions
By default, processing instructions are ignored."""
pass
@old_method('GetBase')
def get_base(self):
"""Returns the base URI for a node
Abstract method, when used on a :class:`Document` it returns the
URI used to load the document, if known. """
raise NotImplementedError
@old_method('SetBase')
def set_base(self, base):
"""Sets the base URI of a node.
base
A string suitable for setting xml:base or a
:class:`pyslet.rfc2396.URI` instance.
Abstract method. Changing the base effects the interpretation
of all relative URIs in this node and its children."""
raise NotImplementedError
@old_method('GetLang')
def get_lang(self):
"""Get the language of a node
Abstract method, when used on a :class:`Document` it gets the
default language to use in the absence of an explicit xml:lang
value."""
raise NotImplementedError
@old_method('SetLang')
def set_lang(self, lang):
"""Set the language of a node
lang
A string suitable for setting the xml:lang attribute of an
element.
Abstract method, when used on a :class:`Document` it sets a
default language to use in the absence of an explicit xml:lang
value."""
raise NotImplementedError
@old_method('GetSpace')
def get_space(self):
"""Gets the space policy of a node
Abstract method, when used on a :class:`Document` it gets the
default space policy to use in the absence of an explicit
xml:space value."""
raise NotImplementedError
class Document(Node):
"""Base class for all XML documents.
With no arguments, a new Document is created with no base URI or
root element.
root
If root is a class object (descended from :class:`Element`) it
is used to create the root element of the document.
If root is an orphan instance of :class:`Element` (i.e., it has
no parent) is is used as the root element of the document and its
:py:meth:`Element.attach_to_doc` method is called.
base_uri (aka baseURI for backwards compatibility)
See :meth:`set_base` for more information
req_manager (aka reqManager for backwards compatibility)
Sets the request manager object to use for future HTTP calls.
Must be an instance of :class:`pyslet.http.client.Client`."""
def __init__(self, root=None, base_uri=None, req_manager=None, **kws):
base_uri = kws.get('baseURI', base_uri)
req_manager = kws.get('reqManager', req_manager)
super(Document, self).__init__()
self.req_manager = req_manager
self.base_uri = None
"""The base uri of the document (as an
:class:`~pyslet.rfc2396.URI` instance)"""
self.lang = None
"""The default language of the document (see :meth:`set_lang`)."""
self.declaration = None
"""The XML declaration (or None if no XMLDeclaration is used)"""
self.dtd = None
"""The dtd associated with the document or None."""
self.root = None
"""The root element or None if no root element has been created
yet."""
if root:
if isinstance(root, Element):
# created from an instance
if root.parent:
raise ValueError(
"Element must be an orphan in Document constructor")
self.root = root
root.parent = self
self.root.attach_to_doc(self)
elif not issubclass(root, Element):
raise ValueError
else:
self.root = root(self)
self.set_base(base_uri)
self.idTable = {}
def get_children(self):
"""Yields the root element"""
if self.root:
yield self.root
def __bytes__(self):
"""Returns the XML document as a string"""
s = io.BytesIO()
self.write_xml(s, escape_char_data7)
return s.getvalue()
def __unicode__(self):
"""Returns the XML document as a unicode string"""
s = io.StringIO()
for data in self.generate_xml(escape_char_data):
s.write(data)
return s.getvalue()
def XMLParser(self, entity): # noqa
"""Creates a parser for this document
entity
The entity to parse the document from
The default implementation creates an instance of
:class:`XMLParser`.
This method allows some document classes to override the parser
used to parse them. This method is only used when parsing
existing document instances (see :py:meth:`read` for more
information).
Classes that override this method may still register themselves
with :py:func:`register_doc_class` but if they do then the
default :py:class:`XMLParser` object will be used as automatic
detection of document class is done by the parser itself based
on the information in the prolog (and/or first element)."""
from pyslet.xml.parser import XMLParser
return XMLParser(entity)
@classmethod
def get_element_class(cls, name):
"""Defaults to returning :class:`Element`.
Derived classes overrride this method to enable the XML parser
to create instances of custom classes based on the document
context and element name."""
return Element
def add_child(self, child_class, name=None):
"""Creates the root element of the document.
If there is already a root element it is detached from the document
first using :py:meth:`Element.detach_from_doc`.
Unlike :meth:`Element.add_child` there are no model
customization options. The root element is always found at
:attr:`root`."""
if self.root:
self.root.detach_from_doc()
self.root.parent = None
self.root = None
child = child_class(self)
if name:
child.set_xmlname(name)
self.root = child
return self.root
def set_base(self, base_uri):
"""Sets the base_uri of the document to the given URI.
base_uri
An instance of :py:class:`pyslet.rfc2396.URI` or an object
that can be passed to its constructor.
Relative file paths are resolved relative to the current working
directory immediately and the absolute URI is recorded as the
document's *base_uri*."""
if base_uri is None:
self.base_uri = None
else:
if isinstance(base_uri, uri.URI):
self.base_uri = base_uri
else:
self.base_uri = uri.URI.from_octets(base_uri)
if not self.base_uri.is_absolute():
cwd = uri.URI.from_path(
os.path.join(os.getcwd(), os.curdir))
self.base_uri = self.base_uri.resolve(cwd)
def get_base(self):
"""Returns a string representation of the document's base_uri."""
if self.base_uri is None:
return None
else:
return str(self.base_uri)
def get_lang(self):
"""Returns the default language for the document."""
return self.lang
def set_lang(self, lang):
"""Sets the default language for the document."""
self.lang = lang
def get_space(self):
"""Returns the default space policy for the document.
By default we reutrn None, indicating that no policy is in
force. Derived documents can oveerrid this behaviour to return
either "preserve" or "default" to affect space handling."""
raise NotImplementedError
@old_method('ValidationError')
def validation_error(self, msg, element, data=None, aname=None):
"""Called when a validation error is triggered.
msg
contains a brief message suitable for describing the error
in a log file.
element
the element in which the validation error occurred
data, aname
See :meth:`Element.validation_error`.
Prior to raising :class:`XMLValidityError` this method logs a
suitable message at WARN level."""
if aname:
logging.warning("%s (in %s.%s) %s", msg, aname,
"" if data is None else repr(data))
else:
logging.warning("%s (in %s) %s", msg, element.xmlname,
"" if data is None else repr(data))
raise XMLValidityError("%s (in %s)" % (msg, element.xmlname))
@old_method('RegisterElement')
def register_element(self, element):
"""Registers an element's ID
If the element has an ID attribute it is added to the internal
ID table. If the ID already exists :class:`XMLIDClashError` is
raised."""
if element.id in self.idTable:
raise XMLIDClashError
else:
self.idTable[element.id] = element
@old_method('UnregisterElement')
def unregister_element(self, element):
"""Removes an elements ID
If the element has a uniquely defined ID it is removed from the
internal ID table. Called prior to detaching the element from
the document."""
if element.id:
del self.idTable[element.id]
@old_method('GetElementByID')
def get_element_by_id(self, id):
"""Returns the element with a given ID
Returns None if the ID is not the ID of any element."""
return self.idTable.get(id, None)
@old_method('GetUniqueID')
def get_unique_id(self, base_str=None):
"""Generates a random element ID that is not yet defined
base_str
A suggested prefix (defaults to None)."""
if not base_str:
base_str = '%X' % random.randint(0, 0xFFFF)
id_str = base_str
id_extra = 0
while id_str in self.idTable:
if not id_extra:
id_extra = random.randint(0, 0xFFFF)
id_str = '%s-%X' % (base_str, id_extra)
id_extra = id_extra + 1
return id_str
@old_method('Read')
def read(self, src=None, **kws):
"""Reads this document, parsing it from a source stream.
With no arguments the document is read from the
:py:attr:`base_uri` which must have been specified on
construction or with a call to the :py:meth:`set_base` method.
src (defaults to None)
You can override the document's base URI by passing a value
for *src* which may be an instance of :py:class:`XMLEntity`
or a file-like object suitable for passing to
:meth:`read_from_stream`."""
if src:
# Read from this stream, ignore base_uri
if isinstance(src, XMLEntity):
self.read_from_entity(src)
else:
self.read_from_stream(src)
elif self.base_uri is None:
raise XMLMissingLocationError
else:
with XMLEntity(self.base_uri, req_manager=self.req_manager) as e:
self.read_from_entity(e)
@old_method('ReadFromStream')
def read_from_stream(self, src):
"""Reads this document from a stream
src
Any object that can be passed to :class:`XMLEntity`'s
constructor.
If you need more control, for example over encodings, you can
create the entity yourself and use :meth:`read_from_entity`
instead."""
self.data = []
e = XMLEntity(src, req_manager=self.req_manager)
self.read_from_entity(e)
@old_method('ReadFromEntity')
def read_from_entity(self, e):
"""Reads this document from an entity
e
An :class:`XMLEntity` instance.
The document is read from the current position in the entity.
"""
self.data = []
parser = self.XMLParser(e)
parser.parse_document(self)
if e.location is not None:
# update our base_uri from the entity
self.set_base(e.location)
@old_method('Create')
def create(self, dst=None, **kws):
"""Creates the Document.
Outputs the document as an XML stream.
dst (defaults to None)
The stream is written to the base_uri by default but if the
'dst' argument is provided then it is written directly to
there instead. dst can be any object that supports the
writing of binary strings.
Currently only documents with file type baseURIs are supported.
The file's parent directories are created if required. The file
is always written using the UTF-8 as per the XML standard."""
if dst:
self.write_xml(dst)
elif self.base_uri is None:
raise XMLMissingLocationError
elif isinstance(self.base_uri, uri.FileURL):
fpath = self.base_uri.get_pathname()
fdir, fname = os.path.split(fpath)
if not os.path.isdir(fdir):
os.makedirs(fdir)
f = open(fpath, 'wb')
try:
self.write_xml(f)
finally:
f.close()
else:
raise XMLUnsupportedSchemeError(self.base_uri.scheme)
@old_method('GenerateXML')
def generate_xml(self, escape_function=escape_char_data, tab='\t',
encoding="UTF-8"):
"""A generator that yields serialised XML
escape_function
The function that will be used to escape character data. The
default is :func:`escape_char_data`. The alternate name
*escapeFunction* is supported for backwards compatibility.
tab (defaults to '\\t')
Whether or not indentation will be used is determined by the
tab parameter. If it is empty then no pretty-printing is
performed, otherwise elements are indented (where allowed
by their defining classes) for ease of reading.
encoding (defaults to "UTF-8")
The name of the character encoding to put in the XML
declaration.
Yields character strings, the first string being the XML
declaration which always specifies the encoding UTF-8"""
if tab:
yield ul('<?xml version="1.0" encoding="%s"?>') % encoding
else:
yield ul('<?xml version="1.0" encoding="%s"?>\n') % encoding
if self.root:
for s in self.root.generate_xml(escape_function, '', tab,
root=True):
yield s
@old_method('WriteXML')
def write_xml(self, writer, escape_function=escape_char_data, tab='\t'):
"""Writes serialized XML to an output stream
writer
A file or file-like object operating in binary mode.
The other arguments follow the same pattern as
:meth:`generate_xml` which this method uses to create the output
which is always UTF-8 encoded."""
for s in self.generate_xml(escape_function, tab):
writer.write(s.encode('utf-8'))
@old_method('Update')
def update(self, **kws):
"""Updates the Document.
Update outputs the document as an XML stream. The stream is
written to the base_uri which must already exist! Currently only
documents with file type baseURIs are supported."""
if self.base_uri is None:
raise XMLMissingLocationError
elif isinstance(self.base_uri, uri.FileURL):
fpath = self.base_uri.get_pathname()
if not os.path.isfile(fpath):
raise XMLMissingResourceError(fpath)
f = open(fpath, 'wb')
try:
self.write_xml(f)
finally:
f.close()
else:
raise XMLUnsupportedSchemeError(self.base_uri.scheme)
@old_method('DiffString')
def diff_string(self, other_doc, before=10, after=5):
"""Compares XML documents
other_doc
Another :class:`Document` instance to compare with.
before (default 10)
Number of lines before the first difference to output
after (default 5)
Number of lines after the first difference to output
The two documents are converted to character strings and then
compared line by line until a difference is found. The result
is suitable for logging or error reporting. Used mainly to make
the output of unittests easier to understand."""
lines = str(self).split('\n')
other_lines = str(other_doc).split('\n')
output = []
i = 0
idiff = None
while i < len(lines) and i < len(other_lines):
if i >= len(lines):
line = ''
else:
line = lines[i]
if i >= len(other_lines):
other_line = ''
else:
other_line = other_lines[i]
if line == other_line:
i = i + 1
continue
else:
# The strings differ from here.
idiff = i
break
if idiff is None:
return None
for i in range3(idiff - before, idiff):
if i < 0:
continue
if i >= len(lines):
line = '[%3i] **EOF**' % i
else:
line = '[%3i] ' % i + lines[i]
output.append(line)
output.append('>>>>> Showing %i lines of difference' % after)
for i in range3(idiff, idiff + after):
if i >= len(lines):
line = '[%3i] **EOF**' % i
else:
line = '[%3i] ' % i + repr(lines[i])
output.append(line)
output.append('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
for i in range3(idiff, idiff + after):