Skip to content
This repository has been archived by the owner on Dec 7, 2022. It is now read-only.
/ pulp Public archive

Commit

Permalink
Add SAX writer to generate XML without etree
Browse files Browse the repository at this point in the history
  • Loading branch information
goosemania committed Jun 6, 2016
1 parent 23e3f43 commit a43eb31
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 0 deletions.
150 changes: 150 additions & 0 deletions server/pulp/plugins/util/saxwriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from xml.sax.handler import ContentHandler
from xml.sax.saxutils import escape, quoteattr


class XMLWriter(ContentHandler):
"""
XML writer similar to xml.sax.saxutils.XMLGenerator.
The result is indented XML, which is written sequentially to the stream.
xml.sax.saxutils.XMLGenerator is not used as is because of the lack of the
following functionality:
- short_empty_elements flag which is backported from Python 3.5.1 xml.sax.saxutils.XMLGenerator
- ability to generate an indented XML
:ivar _pending_start_element: indicates that element was started but it is not clear yet if it
should be closed right away or there will be some content. Needed
for generation of the empty elements in a short form. Backported
from Python 3.5.1 xml.sax.saxutils.XMLGenerator.
:type _pending_start_element: bool
:ivar _indent_lvl: current level of indentation. Needed to generate proper indentation.
:type _indent_lvl: int
:ivar _indent_sep: indentation separator. Needed to generate proper indentation.
:type _indent_sep: str
:ivar _start_element: indicates that element was started and not ended yet. Needed to generate
proper indentation.
:type _start_element: bool
"""

def __init__(self, stream, encoding='utf-8', short_empty_elements=False):
"""
:param stream: a stream to write XML to
:type stream: file-like object
:param encoding: encoding of the generated XML
:type encoding: str
:param short_empty_elements: indicates that the empty elements should be generated in
a short form. Backported from Python 3.5.1
xml.sax.saxutils.XMLGenerator.
:type short_empty_elements: bool
"""
ContentHandler.__init__(self)
self._write = stream.write
self._flush = stream.flush
self._encoding = encoding
self._short_empty_elements = short_empty_elements
self._pending_start_element = False
self._indent_lvl = 0
self._indent_sep = ' '
self._start_element = False

def _finish_pending_start_element(self):
"""
Finish start of the element. Backported from Python 3.5.1 xml.sax.saxutils.XMLGenerator.
"""
if self._pending_start_element:
self._write('>')
self._pending_start_element = False

def writeDoctype(self, doctype_str):
"""
Write a doctype string to the stream.
:param doctype_str: doctype string to write to the stream
:type doctype_str: str
"""
self._write(doctype_str + '\n')

def completeElement(self, name, attrs, text):
"""
Write a complete element to the stream.
:param name: name of the tag
:type name: str
:param attrs: element attributes
:type attrs: dict
:param text: content of the element
:type text: str
"""
self.startElement(name, attrs)
self.characters(text)
self.endElement(name)

# ContentHandler methods

def startDocument(self):
"""
Write the prolog to define XML version and encoding.
"""
self._write('<?xml version="1.0" encoding="%s"?>\n' % self._encoding)

def endDocument(self):
"""
Flush the buffer after generating the XML document.
"""
self._flush()

def startElement(self, name, attrs={}):
"""
Start the element.
:param name: name of the element
:type name: str
:param attrs: element attributes
:type attrs: dict
"""
self._finish_pending_start_element()
if self._start_element:
self._write('\n')
self._indent_lvl += 1
self._write(self._indent_sep * self._indent_lvl)
self._write('<' + name)
for name, value in attrs.items():
if value is not None:
self._write(' %s=%s' % (name, quoteattr(value)))
if self._short_empty_elements:
self._pending_start_element = True
else:
self._write(">")
self._start_element = True

def endElement(self, name):
"""
End the element.
:param name: name of the element
:type name: str
"""
if self._pending_start_element:
self._write(' />\n')
self._pending_start_element = False
else:
if not self._start_element:
self._indent_lvl -= 1
self._write(self._indent_sep * self._indent_lvl)
self._write('</%s>\n' % name)
self._start_element = False

def characters(self, content):
"""
Write the content of the element.
:param content: content of the element
:type content: str
"""
if content:
self._finish_pending_start_element()
if isinstance(content, unicode):
self._write(escape(content).encode(self._encoding))
else:
self._write(escape(content))
62 changes: 62 additions & 0 deletions server/test/unit/plugins/util/test_saxwriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from cStringIO import StringIO

from pulp.common.compat import unittest
from pulp.plugins.util.saxwriter import XMLWriter


class TestXMLWriter(unittest.TestCase):
"""
Test the correct generation of XML using XMLWriter class.
"""
def _calls_to_test_xml_generator(self):
"""
Sequence of calls which test all the methods of XMLWriter class.
"""
self.xml_generator.startDocument()
self.xml_generator.writeDoctype('<!DOCTYPE string here>')
self.xml_generator.startElement('outer_tag1')
self.xml_generator.completeElement('inner_tag1', {}, 'content in utf-8')
self.xml_generator.completeElement('inner_tag2', {'attr1': 'value1'}, u'content in unicode')
self.xml_generator.completeElement('inner_tag3', {'attr1': None, 'attr2': 'value2'}, None)
self.xml_generator.endElement('outer_tag1')
self.xml_generator.startElement('outer_tag2')
self.xml_generator.endElement('outer_tag2')
self.xml_generator.endDocument()

def test_short_empty_elements_true(self):
"""
Test that XML is generated correctly and a short form of empty element is used.
"""
fake_file = StringIO()
self.xml_generator = XMLWriter(fake_file, short_empty_elements=True)
self._calls_to_test_xml_generator()
generated_xml = fake_file.getvalue()
fake_file.close()
expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n' \
'<!DOCTYPE string here>\n' \
'<outer_tag1>\n' \
' <inner_tag1>content in utf-8</inner_tag1>\n' \
' <inner_tag2 attr1="value1">content in unicode</inner_tag2>\n' \
' <inner_tag3 attr2="value2" />\n' \
'</outer_tag1>\n' \
'<outer_tag2 />\n'
self.assertEqual(generated_xml, expected_xml)

def test_short_empty_elements_false(self):
"""
Test that XML is generated correctly and a short form of empty element is not used.
"""
fake_file = StringIO()
self.xml_generator = XMLWriter(fake_file)
self._calls_to_test_xml_generator()
generated_xml = fake_file.getvalue()
fake_file.close()
expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n' \
'<!DOCTYPE string here>\n' \
'<outer_tag1>\n' \
' <inner_tag1>content in utf-8</inner_tag1>\n' \
' <inner_tag2 attr1="value1">content in unicode</inner_tag2>\n' \
' <inner_tag3 attr2="value2"></inner_tag3>\n' \
'</outer_tag1>\n' \
'<outer_tag2></outer_tag2>\n'
self.assertEqual(generated_xml, expected_xml)

0 comments on commit a43eb31

Please sign in to comment.