pyslet/xml/structures.py

#! /usr/bin/env python

import codecs
import io
import logging
import os
import os.path
import random
import warnings

from copy import copy
from types import MethodType

from .. import rfc2396 as uri
from ..http import client as http
from ..pep8 import (
    MigratedClass,
    old_function,
    old_method)
from ..py2 import (
    character,
    dict_items,
    dict_keys,
    force_text,
    is_text,
    is_unicode,
    join_characters,
    range3,
    to_text,
    uempty,
    ul,
    uspace,
    UnicodeMixin)
from ..unicode5 import CharClass


class XMLError(Exception):

    """Base class for all exceptions raised by this module."""
    pass


class DuplicateXMLNAME(XMLError):

    """Raised by :py:func:`map_class_elements`

    Indicates an attempt to declare two classes with the same XML
    name."""
    pass


class XMLAttributeSetter(XMLError):

    """Raised when a badly formed attribute mapping is found."""
    pass


class XMLMissingResourceError(XMLError):

    """Raised when an entity cannot be found (e.g., missing file).

    Also raised when an external entity reference is encountered but the
    opening of external entities is turned off."""
    pass


class XMLMissingLocationError(XMLError):

    """Raised when on create, read or update when base_uri is None"""
    pass


class XMLMixedContentError(XMLError):

    """Raised by :meth:`Element.get_value`

    Indicates unexpected element children."""
    pass


class XMLParentError(XMLError):

    """Raised by :meth:`Element.attach_to_parent`

    Indicates that the element was not an orphan."""
    pass


class XMLUnexpectedHTTPResponse(XMLError):

    """Raised by :meth:`Document.open_uri`

    The message contains the response code and status message received
    from the server."""
    pass


class XMLUnsupportedSchemeError(XMLError):

    """:attr:`Document.base_uri` has an unsupported scheme

    Currently only file, http and https schemes are supported for open
    operations.  For create and update operations, only file types are
    supported."""
    pass


class XMLValidityError(XMLError):

    """Base class for all validation errors

    Raised when a document or content model violates a validity
    constraint.  These errors can be generated by the parser (for
    example, when validating a document against a declared DTD) or by
    Elements themselves when content is encountered that does not fit
    content model expected."""
    pass


class XMLIDClashError(XMLValidityError):

    """A validity error caused by two elements with the same ID"""
    pass


class XMLIDValueError(XMLValidityError):

    """A validity error caused by an element with an invalid ID

    ID attribute must satisfy the production for NAME."""
    pass


class XMLUnknownChild(XMLError):

    """Raised by :meth:`Element.remove_child`

    Indicates that the child being removed was not found in the
    element's content."""
    pass


s = CharClass("\x20\x09\x0A\x0D")


def is_s(c):
    """Tests production [3] S

    Optimized for speed as this function is called a lot by the parser."""
    return c is not None and c in "\x20\x09\x0A\x0D"


def collapse_space(data, smode=True, stest=is_s):
    """Returns data with all spaces collapsed to a single space.

    smode
        Determines the fate of any leading space, by default it is True
        and leading spaces are ignored provided the string has some
        non-space characters.

    stest
        You can override the test of what consitutes a space by passing
        a function for stest, by default we use :func:`is_s` and any
        value passed to stest should behave similarly.

    Note on degenerate case: this function is intended to be called with
    non-empty strings and will never *return* an empty string.  If there
    is no data then a single space is returned (regardless of smode)."""
    result = []
    for c in data:
        if stest(c):
            if not smode:
                result.append(uspace)
            smode = True
        else:
            smode = False
            result.append(c)
    if result:
        return ''.join(result)
    else:
        return uspace


name_start_char = CharClass(
    ':', ('A', 'Z'), '_', ('a', 'z'), (character(0xc0), character(0xd6)),
    (character(0xd8), character(0xf6)), (character(0xf8), character(0x02ff)),
    (character(0x0370), character(0x037d)),
    (character(0x037f), character(0x1fff)),
    (character(0x200c), character(0x200d)),
    (character(0x2070), character(0x218f)),
    (character(0x2c00), character(0x2fef)),
    (character(0x3001), character(0xd7ff)),
    (character(0xf900), character(0xfdcf)),
    (character(0xfdf0), character(0xfffd)))


@old_function('IsNameStartChar')
def is_name_start_char(c):
    return name_start_char.test(c)

is_name_start_char = name_start_char.test   # noqa (def used by decorator)


name_char = CharClass(name_start_char, '-', '.', ('0', '9'), character(0xb7),
                      (character(0x0300), character(0x036f)),
                      (character(0x203f), character(0x2040)))


@old_function('IsNameChar')
def is_name_char(c):
    return name_char.test(c)

is_name_char = name_char.test   # noqa (def used by decorator)


@old_function('IsValidName')
def is_valid_name(name):
    """Tests if name is a string matching production [5] Name"""
    if name:
        if not is_name_start_char(name[0]):
            return False
        for c in name[1:]:
            if not is_name_char(c):
                return False
        return True
    else:
        return False


def is_reserved_name(name):
    """Tests if name is reserved

    Names beginning with 'xml' are reserved for future standardization"""
    if name:
        return name[:3].lower() == 'xml'
    else:
        return False


#: character string constant for "<![CDATA["
CDATA_START = ul('<![CDATA[')

#: character string constant for "]]>"
CDATA_END = ul(']]>')


@old_function('EscapeCDSect')
def escape_cdsect(src):
    """Wraps a string in a CDATA section

    src
        A character string of data

    Returns a character string enclosed in <![CDATA[ ]]> with ]]>
    replaced by the clumsy sequence: ]]>]]&gt;<![CDATA[

    Degenerate case: an empty string is returned as an empty string
    """
    data = src.split(CDATA_END)
    if data:
        result = [CDATA_START, data[0]]
        for d in data[1:]:
            result.append(ul(']]>]]&gt;<![CDATA['))
            result.append(d)
        result.append(CDATA_END)
        return join_characters(result)
    else:
        return uempty


@old_function('EscapeCharData')
def escape_char_data(src, quote=False):
    """Returns a unicode string with XML reserved characters escaped.

    We also escape return characters to prevent them being ignored.  If quote
    is True then the string is returned as a quoted attribute value."""
    data = []
    apos = 0
    quot = 0
    for c in src:
        if c == '&':
            data.append('&amp;')
        elif c == '<':
            data.append('&lt;')
        elif c == '>':
            data.append('&gt;')
        elif c == '\r':
            data.append('&#xD;')
        elif c == '"':
            quot += 1
            data.append(c)
        elif c == "'":
            apos += 1
            data.append(c)
        else:
            data.append(c)
    if quote:
        if quot > apos:
            if apos:
                # escape apos
                for i in range3(len(data)):
                    if data[i] == "'":
                        data[i] = '&apos;'
            data = ["'"] + data + ["'"]
        else:
            if quot:
                # escape quot
                for i in range3(len(data)):
                    if data[i] == '"':
                        data[i] = '&quot;'
            data = ['"'] + data + ['"']
    return ''.join(data)


@old_function('EscapeCharData7')
def escape_char_data7(src, quote=False):
    """Escapes reserved and non-ASCII characters.

    src
        A character string

    quote (defaults to False)
        When True, will surround the output in either single
        or double quotes (preferred) depending on the contents
        of src.

    Characters outside the ASCII range are replaced with character
    references."""
    dst = []
    if quote:
        if "'" in src:
            q = '"'
            qstr = '&#x22;'
        elif '"' in src:
            q = "'"
            qstr = '&#x27;'
        else:
            q = '"'
            qstr = '&#x22;'
        dst.append(q)
    else:
        q = None
        qstr = ''
    for c in src:
        if ord(c) > 0x7F:
            if ord(c) > 0xFF:
                if ord(c) > 0xFFFF:
                    if ord(c) > 0xFFFFFF:
                        dst.append("&#x%08X;" % ord(c))
                    else:
                        dst.append("&#x%06X;" % ord(c))
                else:
                    dst.append("&#x%04X;" % ord(c))
            else:
                dst.append("&#x%02X;" % ord(c))
        elif c == '<':
            dst.append("&lt;")
        elif c == '&':
            dst.append("&amp;")
        elif c == '>':
            dst.append("&gt;")
        elif c == '\r':
            dst.append("&#xD;")
        elif c == q:
            dst.append(qstr)
        else:
            dst.append(c)
    if quote:
        dst.append(q)
    return ''.join(dst)


_xml_base = 'xml:base'

_xml_lang = 'xml:lang'

_xml_space = 'xml:space'


class Node(UnicodeMixin, MigratedClass):

    """Base class for Element and Document shared attributes.

    XML documents are defined hierarchicaly, each element has a parent
    which is either another element or an XML document."""

    def __init__(self, parent=None):
        self.parent = parent
        super(Node, self).__init__()
        """The parent of this element, for XML documents this attribute
        is used as a sentinel to simplify traversal of the hierarchy and
        is set to None."""

    def __unicode__(self):
        raise NotImplementedError

    @old_method('GetChildren')
    def get_children(self):
        """Returns an iterator over this object's children."""
        raise NotImplementedError

    @classmethod
    @old_method('GetElementClass')
    def get_element_class(cls, name):
        """Returns a class object for representing an element

        name
            a unicode string representing the element name.

        The default implementation returns None - for elements this has
        the effect of deferring the call to the parent document (where
        this method is overridden to return :py:class:`Element`).

        This method is called immediately prior to :py:meth:`add_child`
        and (when applicable) :py:meth:`get_child_class`.

        The real purpose of this method is to allow an element class to
        directly control the way the name of a child element maps to the
        class used to represent it.  You would normally override this
        method in the :py:class:`Document` to map element names to
        classes but in some cases you may want to tweek the mapping at
        the individual element level.  For example, if the same element
        name is used for two different purposes in the same XML
        document.  Although confusing, this is allowed in XML schema."""
        return None

    @old_method('GetChildClass')
    def get_child_class(self, stag_class):
        """Supports custom content model handling

        stag_class
            The class of an element that is about to be created in the
            current context with :meth:`add_child` or the builtin *str*
            if data has been recieved in a context where only element
            content was expected.

        This method is only called when the
        :attr:`XMLParser.sgml_omittag` option is in effect.  It is called
        prior to :py:meth:`add_child` and gives the context (the parent
        element or document) a chance to modify the child element that
        will be created or indicate the end of the current element through
        use of the OMITTAG feature of SGML.

        It returns the class of an element whose start tag has been
        omitted from the the document and should be added at this point
        or None if stag_class implies the end of the current element
        *and* the end tag may be omitted.

        Otherwise this method should return stag_class unchanged (the
        default implementation does this) indicating that the parser
        should proceed as normal.  In the case of unexpected data this
        is treated as a validity error and handled according to the
        parser's validity checking options.

        Validation errors are dealt with by the parser or, where the
        model is encoded into the classes themselves, by
        :meth;`add_child` and *not* by this method which should never
        raise validation errors.

        Although not necessary for true XML parsing this method allows
        us to support the parsing of XML-like documents that omit tags,
        such as HTML.  For example, suppose we have the following
        document::

            <title>My Blank HTML Page</title>

        The parser would recognise the start tag for <title> and then
        call this method (on the HTML document) passing the
        :class:`pyslet.html.Title` class.  For HTML documents, this
        method always returns the :class:`pyslet.html401.HTML` class
        (ignoring stag_class completely).  The result is that an HTML
        element is opened instead and the parser tries again, calling
        this method for the new HTML element.  That does not accept
        Title either and returns the :class:`pyslet.html.Head` class.
        Finally, a Head element is opened and that will accept Title as
        a child so it returns stag_class unchanged and the parser
        continues having inferred the omitted tags: <html> and <head>."""
        return stag_class

    @old_method('ChildElement')
    def add_child(self, child_class, name=None):
        """Returns a new child of the given class attached to this object.

        child_class
            A class (or callable) used to create a new instance of
            :py:class:`Element`.

        name
            The name given to the element (by the caller).  If no name
            is given then the default name for the child is used. When
            the child returned is an existing instance, name is
            ignored."""
        raise NotImplementedError

    def processing_instruction(self, target, instruction=''):
        """Abstract method for handling processing instructions

        By default, processing instructions are ignored."""
        pass

    @old_method('GetBase')
    def get_base(self):
        """Returns the base URI for a node

        Abstract method, when used on a :class:`Document` it returns the
        URI used to load the document, if known. """
        raise NotImplementedError

    @old_method('SetBase')
    def set_base(self, base):
        """Sets the base URI of a node.

        base
            A string suitable for setting xml:base or a
            :class:`pyslet.rfc2396.URI` instance.

        Abstract method.  Changing the base effects the interpretation
        of all relative URIs in this node and its children."""
        raise NotImplementedError

    @old_method('GetLang')
    def get_lang(self):
        """Get the language of a node

        Abstract method, when used on a :class:`Document` it gets the
        default language to use in the absence of an explicit xml:lang
        value."""
        raise NotImplementedError

    @old_method('SetLang')
    def set_lang(self, lang):
        """Set the language of a node

        lang
            A string suitable for setting the xml:lang attribute of an
            element.

        Abstract method, when used on a :class:`Document` it sets a
        default language to use in the absence of an explicit xml:lang
        value."""
        raise NotImplementedError

    @old_method('GetSpace')
    def get_space(self):
        """Gets the space policy of a node

        Abstract method, when used on a :class:`Document` it gets the
        default space policy to use in the absence of an explicit
        xml:space value."""
        raise NotImplementedError


class Document(Node):

    """Base class for all XML documents.

    With no arguments, a new Document is created with no base URI or
    root element.

    root
        If root is a class object (descended from :class:`Element`) it
        is used to create the root element of the document.

        If root is an orphan instance of :class:`Element` (i.e., it has
        no parent) is is used as the root element of the document and its
        :py:meth:`Element.attach_to_doc` method is called.

    base_uri (aka baseURI for backwards compatibility)
        See :meth:`set_base` for more information

    req_manager (aka reqManager for backwards compatibility)
        Sets the request manager object to use for future HTTP calls.
        Must be an instance of :class:`pyslet.http.client.Client`."""

    def __init__(self, root=None, base_uri=None, req_manager=None, **kws):
        base_uri = kws.get('baseURI', base_uri)
        req_manager = kws.get('reqManager', req_manager)
        super(Document, self).__init__()
        self.req_manager = req_manager
        self.base_uri = None
        """The base uri of the document (as an
        :class:`~pyslet.rfc2396.URI` instance)"""
        self.lang = None
        """The default language of the document (see :meth:`set_lang`)."""
        self.declaration = None
        """The XML declaration (or None if no XMLDeclaration is used)"""
        self.dtd = None
        """The dtd associated with the document or None."""
        self.root = None
        """The root element or None if no root element has been created
        yet."""
        if root:
            if isinstance(root, Element):
                # created from an instance
                if root.parent:
                    raise ValueError(
                        "Element must be an orphan in Document constructor")
                self.root = root
                root.parent = self
                self.root.attach_to_doc(self)
            elif not issubclass(root, Element):
                raise ValueError
            else:
                self.root = root(self)
        self.set_base(base_uri)
        self.idTable = {}

    def get_children(self):
        """Yields the root element"""
        if self.root:
            yield self.root

    def __bytes__(self):
        """Returns the XML document as a string"""
        s = io.BytesIO()
        self.write_xml(s, escape_char_data7)
        return s.getvalue()

    def __unicode__(self):
        """Returns the XML document as a unicode string"""
        s = io.StringIO()
        for data in self.generate_xml(escape_char_data):
            s.write(data)
        return s.getvalue()

    def XMLParser(self, entity):    # noqa
        """Creates a parser for this document

        entity
            The entity to parse the document from

        The default implementation creates an instance of
        :class:`XMLParser`.

        This method allows some document classes to override the parser
        used to parse them.  This method is only used when parsing
        existing document instances (see :py:meth:`read` for more
        information).

        Classes that override this method may still register themselves
        with :py:func:`register_doc_class` but if they do then the
        default :py:class:`XMLParser` object will be used as automatic
        detection of document class is done by the parser itself based
        on the information in the prolog (and/or first element)."""
        from pyslet.xml.parser import XMLParser
        return XMLParser(entity)

    @classmethod
    def get_element_class(cls, name):
        """Defaults to returning :class:`Element`.

        Derived classes overrride this method to enable the XML parser
        to create instances of custom classes based on the document
        context and element name."""
        return Element

    def add_child(self, child_class, name=None):
        """Creates the root element of the document.

        If there is already a root element it is detached from the document
        first using :py:meth:`Element.detach_from_doc`.

        Unlike :meth:`Element.add_child` there are no model
        customization options.  The root element is always found at
        :attr:`root`."""
        if self.root:
            self.root.detach_from_doc()
            self.root.parent = None
            self.root = None
        child = child_class(self)
        if name:
            child.set_xmlname(name)
        self.root = child
        return self.root

    def set_base(self, base_uri):
        """Sets the base_uri of the document to the given URI.

        base_uri
            An instance of :py:class:`pyslet.rfc2396.URI` or an object
            that can be passed to its constructor.

        Relative file paths are resolved relative to the current working
        directory immediately and the absolute URI is recorded as the
        document's *base_uri*."""
        if base_uri is None:
            self.base_uri = None
        else:
            if isinstance(base_uri, uri.URI):
                self.base_uri = base_uri
            else:
                self.base_uri = uri.URI.from_octets(base_uri)
            if not self.base_uri.is_absolute():
                cwd = uri.URI.from_path(
                    os.path.join(os.getcwd(), os.curdir))
                self.base_uri = self.base_uri.resolve(cwd)

    def get_base(self):
        """Returns a string representation of the document's base_uri."""
        if self.base_uri is None:
            return None
        else:
            return str(self.base_uri)

    def get_lang(self):
        """Returns the default language for the document."""
        return self.lang

    def set_lang(self, lang):
        """Sets the default language for the document."""
        self.lang = lang

    def get_space(self):
        """Returns the default space policy for the document.

        By default we reutrn None, indicating that no policy is in
        force.  Derived documents can oveerrid this behaviour to return
        either "preserve" or "default" to affect space handling."""
        raise NotImplementedError

    @old_method('ValidationError')
    def validation_error(self, msg, element, data=None, aname=None):
        """Called when a validation error is triggered.

        msg
            contains a brief message suitable for describing the error
            in a log file.

        element
            the element in which the validation error occurred

        data, aname
            See :meth:`Element.validation_error`.

        Prior to raising :class:`XMLValidityError` this method logs a
        suitable message at WARN level."""
        if aname:
            logging.warning("%s (in %s.%s) %s", msg, aname,
                            "" if data is None else repr(data))
        else:
            logging.warning("%s (in %s) %s", msg, element.xmlname,
                            "" if data is None else repr(data))
        raise XMLValidityError("%s (in %s)" % (msg, element.xmlname))

    @old_method('RegisterElement')
    def register_element(self, element):
        """Registers an element's ID

        If the element has an ID attribute it is added to the internal
        ID table.  If the ID already exists :class:`XMLIDClashError` is
        raised."""
        if element.id in self.idTable:
            raise XMLIDClashError
        else:
            self.idTable[element.id] = element

    @old_method('UnregisterElement')
    def unregister_element(self, element):
        """Removes an elements ID

        If the element has a uniquely defined ID it is removed from the
        internal ID table.  Called prior to detaching the element from
        the document."""
        if element.id:
            del self.idTable[element.id]

    @old_method('GetElementByID')
    def get_element_by_id(self, id):
        """Returns the element with a given ID

        Returns None if the ID is not the ID of any element."""
        return self.idTable.get(id, None)

    @old_method('GetUniqueID')
    def get_unique_id(self, base_str=None):
        """Generates a random element ID that is not yet defined

        base_str
            A suggested prefix (defaults to None)."""
        if not base_str:
            base_str = '%X' % random.randint(0, 0xFFFF)
        id_str = base_str
        id_extra = 0
        while id_str in self.idTable:
            if not id_extra:
                id_extra = random.randint(0, 0xFFFF)
            id_str = '%s-%X' % (base_str, id_extra)
            id_extra = id_extra + 1
        return id_str

    @old_method('Read')
    def read(self, src=None, **kws):
        """Reads this document, parsing it from a source stream.

        With no arguments the document is read from the
        :py:attr:`base_uri` which must have been specified on
        construction or with a call to the :py:meth:`set_base` method.

        src (defaults to None)
            You can override the document's base URI by passing a value
            for *src* which may be an instance of :py:class:`XMLEntity`
            or a file-like object suitable for passing to
            :meth:`read_from_stream`."""
        if src:
            # Read from this stream, ignore base_uri
            if isinstance(src, XMLEntity):
                self.read_from_entity(src)
            else:
                self.read_from_stream(src)
        elif self.base_uri is None:
            raise XMLMissingLocationError
        else:
            with XMLEntity(self.base_uri, req_manager=self.req_manager) as e:
                self.read_from_entity(e)

    @old_method('ReadFromStream')
    def read_from_stream(self, src):
        """Reads this document from a stream

        src
            Any object that can be passed to :class:`XMLEntity`'s
            constructor.

        If you need more control, for example over encodings, you can
        create the entity yourself and use :meth:`read_from_entity`
        instead."""
        self.data = []
        e = XMLEntity(src, req_manager=self.req_manager)
        self.read_from_entity(e)

    @old_method('ReadFromEntity')
    def read_from_entity(self, e):
        """Reads this document from an entity

        e
            An :class:`XMLEntity` instance.

        The document is read from the current position in the entity.
        """
        self.data = []
        parser = self.XMLParser(e)
        parser.parse_document(self)
        if e.location is not None:
            # update our base_uri from the entity
            self.set_base(e.location)

    @old_method('Create')
    def create(self, dst=None, **kws):
        """Creates the Document.

        Outputs the document as an XML stream.

        dst (defaults to None)
            The stream is written to the base_uri by default but if the
            'dst' argument is provided then it is written directly to
            there instead.  dst can be any object that supports the
            writing of binary strings.

        Currently only documents with file type baseURIs are supported.
        The file's parent directories are created if required.  The file
        is always written using the UTF-8 as per the XML standard."""
        if dst:
            self.write_xml(dst)
        elif self.base_uri is None:
            raise XMLMissingLocationError
        elif isinstance(self.base_uri, uri.FileURL):
            fpath = self.base_uri.get_pathname()
            fdir, fname = os.path.split(fpath)
            if not os.path.isdir(fdir):
                os.makedirs(fdir)
            f = open(fpath, 'wb')
            try:
                self.write_xml(f)
            finally:
                f.close()
        else:
            raise XMLUnsupportedSchemeError(self.base_uri.scheme)

    @old_method('GenerateXML')
    def generate_xml(self, escape_function=escape_char_data, tab='\t',
                     encoding="UTF-8"):
        """A generator that yields serialised XML

        escape_function
            The function that will be used to escape character data. The
            default is :func:`escape_char_data`.  The alternate name
            *escapeFunction* is supported for backwards compatibility.

        tab (defaults to '\\t')
            Whether or not indentation will be used is determined by the
            tab parameter.  If it is empty then no pretty-printing is
            performed, otherwise elements are indented (where allowed
            by their defining classes) for ease of reading.

        encoding (defaults to "UTF-8")
            The name of the character encoding to put in the XML
            declaration.

        Yields character strings, the first string being the XML
        declaration which always specifies the encoding UTF-8"""
        if tab:
            yield ul('<?xml version="1.0" encoding="%s"?>') % encoding
        else:
            yield ul('<?xml version="1.0" encoding="%s"?>\n') % encoding
        if self.root:
            for s in self.root.generate_xml(escape_function, '', tab,
                                            root=True):
                yield s

    @old_method('WriteXML')
    def write_xml(self, writer, escape_function=escape_char_data, tab='\t'):
        """Writes serialized XML to an output stream

        writer
            A file or file-like object operating in binary mode.

        The other arguments follow the same pattern as
        :meth:`generate_xml` which this method uses to create the output
        which is always UTF-8 encoded."""
        for s in self.generate_xml(escape_function, tab):
            writer.write(s.encode('utf-8'))

    @old_method('Update')
    def update(self, **kws):
        """Updates the Document.

        Update outputs the document as an XML stream.  The stream is
        written to the base_uri which must already exist!  Currently only
        documents with file type baseURIs are supported."""
        if self.base_uri is None:
            raise XMLMissingLocationError
        elif isinstance(self.base_uri, uri.FileURL):
            fpath = self.base_uri.get_pathname()
            if not os.path.isfile(fpath):
                raise XMLMissingResourceError(fpath)
            f = open(fpath, 'wb')
            try:
                self.write_xml(f)
            finally:
                f.close()
        else:
            raise XMLUnsupportedSchemeError(self.base_uri.scheme)

    @old_method('DiffString')
    def diff_string(self, other_doc, before=10, after=5):
        """Compares XML documents

        other_doc
            Another :class:`Document` instance to compare with.

        before (default 10)
            Number of lines before the first difference to output

        after (default 5)
            Number of lines after the first difference to output

        The two documents are converted to character strings and then
        compared line by line until a difference is found.  The result
        is suitable for logging or error reporting.  Used mainly to make
        the output of unittests easier to understand."""
        lines = str(self).split('\n')
        other_lines = str(other_doc).split('\n')
        output = []
        i = 0
        idiff = None
        while i < len(lines) and i < len(other_lines):
            if i >= len(lines):
                line = ''
            else:
                line = lines[i]
            if i >= len(other_lines):
                other_line = ''
            else:
                other_line = other_lines[i]
            if line == other_line:
                i = i + 1
                continue
            else:
                # The strings differ from here.
                idiff = i
                break
        if idiff is None:
            return None
        for i in range3(idiff - before, idiff):
            if i < 0:
                continue
            if i >= len(lines):
                line = '[%3i] **EOF**' % i
            else:
                line = '[%3i] ' % i + lines[i]
            output.append(line)
        output.append('>>>>> Showing %i lines of difference' % after)
        for i in range3(idiff, idiff + after):
            if i >= len(lines):
                line = '[%3i] **EOF**' % i
            else:
                line = '[%3i] ' % i + repr(lines[i])
            output.append(line)
        output.append('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
        for i in range3(idiff, idiff + after):
            if i >= len(other_lines):
                line = '[%3i] **EOF**' % i
            else:
                line = '[%3i] ' % i + repr(other_lines[i])
            output.append(line)
        return '\n'.join(output)


class XMLDTD(MigratedClass):

    """An object that models a document type declaration.

    The document type declaration acts as a container for the entity,
    element and attribute declarations used in a document. """

    def __init__(self):
        self.name = None            #: The declared Name of the root element
        # : An :py:class:`XMLExternalID` instance (may be None)
        self.external_id = None
        self.parameter_entities = {}
        """A dictionary of XMLParameterEntity instances keyed on entity
        name."""
        self.general_entities = {}
        """A dictionary of XMLGeneralEntity instances keyed on entity
        name."""
        self.notations = {}
        """A dictionary of XMLNotation instances keyed on notation
        name."""
        self.element_list = {}
        """A dictionary of :py:class:`ElementType` definitions keyed on
        the
        name of element."""
        self.attribute_lists = {}
        """A dictionary of dictionaries, keyed on element name.  Each of
        the resulting dictionaries is a dictionary of
        :py:class:`XMLAttributeDefinition` keyed on attribute name."""

    def declare_entity(self, entity):
        """Declares an entity in this document.

        The same method is used for both general and parameter entities.  The
        value of *entity* can be either an :py:class:`XMLGeneralEntity` or an
        :py:class:`XMLParameterEntity` instance."""
        if isinstance(entity, XMLGeneralEntity):
            self.general_entities[entity.name] = entity
        elif isinstance(entity, XMLParameterEntity):
            self.parameter_entities[entity.name] = entity
        else:
            raise ValueError

    def get_parameter_entity(self, name):
        """Returns the parameter entity definition matching *name*.

        Returns an instance of :py:class:`XMLParameterEntity`.  If no
        parameter has been declared with *name* then None is returned."""
        return self.parameter_entities.get(name, None)

    def get_entity(self, name):
        """Returns the general entity definition matching *name*.

        Returns an instance of :py:class:`XMLGeneralEntity`.  If no
        general has been declared with *name* then None is returned."""
        return self.general_entities.get(name, None)

    def declare_notation(self, notation):
        """Declares a notation for this document.

        The value of *notation* must be a :py:class:`XMLNotation`
        instance."""
        self.notations[notation.name] = notation

    def get_notation(self, name):
        """Returns the notation declaration matching *name*.

        name
            The name of the notation to search for.

        Returns an instance of :py:class:`XMLNotation`.  If no notation
        has been declared with *name* then None is returned."""
        return self.notations.get(name, None)

    def declare_element_type(self, etype):
        """Declares an element type.

        etype
            An :py:class:`ElementType` instance containing the element
            definition."""
        elist = self.element_list.get(etype.name, None)
        if elist is None:
            self.element_list[etype.name] = etype

    def get_element_type(self, element_name):
        """Looks up an element type definition.

        element_name
            the name of the element type to look up

        The method returns an instance of :py:class:`ElementType` or
        None if no element with that name has been declared."""
        return self.element_list.get(element_name, None)

    def declare_attribute(self, element_name, attr_def):
        """Declares an attribute.

        element_name
            the name of the element type which should have this
            attribute applied

        attr_def
            An :py:class:`XMLAttributeDefinition` instance describing
            the attribute being declared."""
        alist = self.attribute_lists.get(element_name, None)
        if alist is None:
            self.attribute_lists[element_name] = alist = {}
        if attr_def.name not in alist:
            alist[attr_def.name] = attr_def

    @old_method('GetAttributeList')
    def get_attribute_list(self, name):
        """Returns a dictionary of attribute definitions

        name
            The name of the element type to look up.

        If there are no attributes declared for this element type, None
        is returned."""
        return self.attribute_lists.get(name, None)

    def get_attribute_definition(self, element_name, attr_name):
        """Looks up an attribute definition.

        element_name
            the name of the element type in which to search

        attr_name
            the name of the attribute to search for.

        The method returns an instance of
        :py:class:`XMLAttributeDefinition` or None if no attribute
        matching this description has been declared."""
        alist = self.attribute_lists.get(element_name, None)
        if alist:
            return alist.get(attr_name, None)
        else:
            return None


class XMLTextDeclaration(object):

    def __init__(self, version="1.0", encoding="UTF-8"):
        """Represents the text components of an XML declaration.

        Both *version* and *encoding* are optional, though one or other are
        required depending on the context in which the declaration will be
        used."""
        self.version = version
        self.encoding = encoding


class XMLDeclaration(XMLTextDeclaration):

    """Represents a full XML declaration.

    Unlike the parent class, :py:class:`XMLTextDeclaration`, the version
    is required. *standalone* defaults to False as this is the assumed
    value if there is no standalone declaration."""

    def __init__(self, version, encoding="UTF-8", standalone=False):
        XMLTextDeclaration.__init__(self, version, encoding)
        self.standalone = standalone
        """Whether an XML document is standalone."""


class ElementType(object):

    """Represents element type definitions."""

    #: Content type constant for EMPTY
    EMPTY = 0
    Empty = 0

    #: Content type constant for ANY
    ANY = 1
    Any = 1

    #: Content type constant for mixed content
    MIXED = 2
    Mixed = 2

    #: Content type constant for element content
    ELEMENT_CONTENT = 3
    ElementContent = 3

    #: Additional content type constant for SGML CDATA
    SGMLCDATA = 4

    def __init__(self):
        #: The entity in which this element was declared
        self.entity = None
        #: The name of this element
        self.name = None
        self.content_type = ElementType.EMPTY
        """The content type of this element, one of the constants
        defined above."""
        self.content_model = None
        """A :py:class:`XMLContentParticle` instance which contains the
        element's content model or None in the case of EMPTY or ANY
        declarations."""
        self.particle_map = None
        """A mapping used to validate the content model during parsing.
        It maps the name of the first child element found to a list of
        :py:class:`XMLNameParticle` instances that can represent it in
        the content model.  For more information see
        :py:attr:`XMLNameParticle.particle_map`."""

    def build_model(self):
        """Builds internal strutures to support model validation."""
        if self.content_type == self.ELEMENT_CONTENT:
            self.particle_map = {}
            if not self.content_model.seek_particles(self.particle_map):
                # the entire content model is optional so add ETag mapping
                self.particle_map[''] = None
            exit_particles = {'': None}
            self.content_model.build_particle_maps(exit_particles)
        elif self.content_type == self.MIXED:
            self.particle_map = {}
            self.content_model.seek_particles(self.particle_map)
            self.particle_map[''] = None
            # always optional repeatable
            self.content_model.build_particle_maps(self.particle_map)

    def is_deterministic(self):
        """Tests if the content model is deterministic.

        For degenerate cases (elements declared with ANY or EMPTY) the
        method always returns True."""
        if self.content_type == self.ELEMENT_CONTENT or \
                self.content_type == self.MIXED:
            return self.content_model.is_deterministic(self.particle_map)
        else:
            return True

# Constants for backwards compatibility
XMLEmpty = ElementType.EMPTY
XMLMixedContent = ElementType.MIXED
ElementContent = ElementType.ELEMENT_CONTENT
SGMLCDATA = ElementType.SGMLCDATA


class Element(Node):

    """Base class that represents all XML elements.

    This class is usually used only as a default to represent elements
    with unknown content models or that require no special processing.
    The power of Pyslet's XML package comes when different classes are
    derived from this one to represent the different (classes of)
    elements defined by an application.  These derived classes will
    normally some form of custom serialisation behaviour (see below).

    Although derived classes are free to implement a wide range of
    python protocols they *must* always return True in truth tests. An
    implementation of __bool__ (Python 2, __nonzero__) is provided that
    does this.  This ensures that derived classes are free to implement
    __len__ but bear in mind that an instance of a derived class for
    which __len__ returns 0 *must* still evaluate to True.

    Elements compare equal if their names, attribute lists and canonical
    children all compare equal.  No rich comparison methods are provided.

    In addition to truth testing, custom attribute serialisation
    requires a custom implementation of __getattr__, see below for more
    details.

    Elements are usually constructed by calling the parent element's (or
    document's) :meth:`Node.add_child` method.  When constructed
    directly, the constructor requires that the parent :class:`Node` be
    passed as an argument.  If you pass None then an orphan element is
    created (see :meth:`attach_to_parent`).

    Some aspects of the element's XML serialisation behaviour are
    controlled by special class attributes that can be set on derived
    classes.

    XMLNAME
        The default name of the element the class represents.

    XMLCONTENT
        The default content model of the element; one of the
        :py:class:`ElementType` constants.

    You can customise attribute mappings using the following special
    class attributes.

    ID
        The name of the ID attribute if the element has a unique ID.
        With this class attribute set, ID handling is automatic (see
        :py:meth:`set_id` and py:attr:`id` below).

    By default, attributes are simply stored as name/value character
    strings in an internal dictionary.  It is often more useful to map
    XML attributes directly onto similarly named attributes of the
    instances that represent each element.

    This mapping can be provided using class attributes of the form
    XMLATTR_aname where /aname/ is the name of the attribute as it would
    appear in the element's tag.  There are a number of forms of attribute
    mapping.

    XMLATTR_aname=<string>

        This form creates a simple mapping from the XML attribute
        'aname' to a python attribute with a defined name.  For example,
        you might want to create a mapping like this to avoid a python
        reserved word::

                XMLATTR_class="style_class"

        This allows XML elements like this::

                <element class="x"/>

        To be parsed into python objects that behave like this::

                element.style_class=="x"     # True

        If an instance is missing a python attribute corresponding to a
        defined XML attribute, or it's value has been set to None, then
        the XML attribute is omitted from the element's tag when
        generating XML output.


    XMLATTR_aname=(<string>, decode_function, encode_function)

        More complex attributes can be handled by setting XMLATTR_aname
        to a tuple.  The first item is the python attribute name (as
        above); the *decode_function* is a simple callable that takes a
        string argument and returns the decoded value of the attribute
        and the *encode_function* performs the reverse transformation.

        The encode/decode functions can be None to indicate a
        no-operation.

        For example, you might want to create an integer attribute using
        something like::

                <!-- source XML -->
                <element apples="5"/>

                # class attribute definition
                XMLATTR_apples = ('n_apples', int, str)

                # the resulting object behaves like this...
                element.n_apples == 5    # True

    XMLATTR_aname=(<string>, decode_function, encode_function, type)

        When XML attribute values are parsed from tags the optional
        *type* component of the tuple descriptor can be used to indicate
        a multi-valued attribute.  For example, you might want to use a
        mult-valued mapping for XML attributes defined using one of the
        plural forms, IDREFS, ENTITIES and NMTOKENS.

        If the *type* value is not None then the XML attribute value is
        first split by white-space, as per the XML specification, and
        then the decode function is applied to each resulting component.
        The instance attribute is then set depending on the value of
        *type*:

        list
            The instance attribute becomes a list, for example::

                <!-- source XML -->
                <element primes="2 3 5 7"/>

                # class attribute definition
                XMLATTR_primes = ('primes', int, str, list)

                # resulting object behaves like this...
                element.primes == [2, 3, 5, 7]      # True

        dict
            The instance attribute becomes a dictionary mapping parsed
            values on to their frequency, for example::

                <!-- source XML -->
                <element fruit="apple pear orange pear"/>

                # class attribute definition
                XMLATTR_fruit = ('fruit', None, None, dict)

                # resulting object behaves like this...
                element.fruit == {'apple': 1, 'orange': 1, 'pear': 2}

            In this case, the decode function (if given) must return a
            hashable object!

        When serialising to XML the reverse transformations are
        performed using the encode functions and the type (plain, list
        or dict) of the attribute's *current* value.  The declared
        multi-valued type is ignored.  For dictionary values the order
        of the output values may not be the same as the order originally
        read from the XML input.

        Warning:  Empty lists and dictionaries result in XML attribute
        values that are present but with empty strings.  If you wish to
        omit these attributes in the output XML you must set the
        attribute value to None.

    Some element specifications define large numbers of optional
    attributes and it is inconvenient to write constructors to
    initialise these members in each instance and possibly wasteful
    of memory if a document contains large numbers of such elements.

    To obviate the need for optional attributes to be present in every
    instance an implementation of __getattr__ is provided that will
    ensure that element.aname returns None if 'aname' is the target of
    an attribute mapping rule, regardless of whether or not the
    attribute has actually been seet for the instance.

        Implementation note: internally, the XMLATTR_* descriptors are
        parsed into two mappings the first time they are needed.  The
        forward map maps XML attribute names onto tuples of:

            (<python attribute name>, decode_function, type)

        The reverse map maps python attribute names onto a tuple of:

            (<xml attribute name>, encode_function)

    XML attribute names may contain many characters that are not legal
    in Python syntax but automated attribute processing is still
    supported for these attributes even though the declaration cannot be
    written into the class definition.  Use the builtin function setattr
    immediately after the class is defined, for example::

        class MyElement(Element):
            pass

        setattr(MyElement, 'XMLATTR_hyphen-attr', 'hyphen_attr')"""

    #: We default to a mixed content model
    XMLCONTENT = ElementType.MIXED

    def __init__(self, parent, name=None):
        super(Element, self).__init__(parent)
        if name is None:
            if hasattr(self.__class__, 'XMLNAME'):
                self.set_xmlname(self.XMLNAME)
            else:
                self.set_xmlname(None)
        else:
            warnings.warn(
                "Element: passing name to constructor is deprecated (%s); "
                "use set_xmlname instead" % name)
            import traceback
            traceback.print_stack()
            self.xmlname = name
        self.id = None
        self._attrs = {}
        self._children = []

    def __bool__(self):
        # All elements are considered non-zero; we implement this to
        # reduce spurious calls to __getattr__
        return True

    __nonzero__ = __bool__

    @old_method('SetXMLName')
    def set_xmlname(self, name):
        """Sets the name of this element

        name
            A character string.

        You will not normally need to call this method, it is called
        automatically during child creation."""
        self.xmlname = name

    @old_method('GetXMLName')
    def get_xmlname(self):
        """Returns the name of this element

        In the default implementation this is a simple character
        string."""
        return self.xmlname

    @old_method('GetDocument')
    def get_document(self):
        """Returns the document that contains the element.

        If the element is an orphan, or is the descendent of an orphan
        then None is returned."""
        if self.parent:
            if isinstance(self.parent, Document):
                return self.parent
            else:
                return self.parent.get_document()
        else:
            return None

    @old_method('SetID')
    def set_id(self, id):
        """Sets the id of the element

        The change is registered with the enclosing document.  If the id
        is already taken then :class:`XMLIDClashError` is raised."""
        if not self.is_valid_name(id):
            raise XMLIDValueError(id)
        doc = self.get_document()
        if doc:
            doc.unregister_element(self)
            self.id = id
            doc.register_element(self)
        else:
            self.id = id

    @classmethod
    @old_method('MangleAttributeName')
    def mangle_aname(cls, name):
        """Returns a mangled attribute name

        A mangled attribute name is simple name prefixed with "XMLATTR\_".
        """
        return "XMLATTR_" + name

    @classmethod
    @old_method('UnmangleAttributeName')
    def unmangle_aname(cls, mname):
        """Returns an unmangled attribute name.

        If mname is not a mangled name, None is returned.  A mangled
        attribute name starts with "XMLATTR\_".
        """
        if mname.startswith('XMLATTR_'):
            return mname[8:]
        else:
            return None

    @classmethod
    def _remap(cls):
        def nop(arg):
            return arg

        amap = {}
        armap = {}
        for mname in dir(cls):
            name = cls.unmangle_aname(mname)
            if name:
                setter = getattr(cls, mname)
                if is_text(setter):
                    # use simple attribute assignment
                    attr_name, encoder, decoder, vtype = \
                        setter, None, None, None
                elif isinstance(setter, tuple):
                    if len(setter) == 3:
                        attr_name, decoder, encoder = setter
                        vtype = None
                    elif len(setter) == 4:
                        attr_name, decoder, encoder, vtype = setter
                    else:
                        raise XMLAttributeSetter(
                            "bad XMLATTR_ definition: %s attribute of %s" %
                            (name, cls.__name__))
                else:
                    raise XMLAttributeSetter(
                        "setting %s attribute of %s" %
                        (name, cls.__name__))
                if encoder is None:
                    encoder = nop
                if decoder is None:
                    decoder = nop
                if vtype not in (list, dict, None):
                    raise XMLAttributeSetter(
                        "Legacy XMLATTR_ definition: %s attribute of %s" %
                        (name, cls.__name__))
                amap[name] = (attr_name, decoder, vtype)
                armap[attr_name] = (name, encoder)
        setattr(cls, "_xml_amap", amap)
        setattr(cls, "_xml_armap", armap)

    @classmethod
    def _armap(cls):
        if "_xml_armap" not in cls.__dict__:
            cls._remap()
        return cls._xml_armap

    @classmethod
    def _amap(cls):
        if "_xml_amap" not in cls.__dict__:
            cls._remap()
        return cls._xml_amap

    def __getattr__(self, name):
        if name in self._armap():
            return None
        else:
            raise AttributeError(name)

    @old_method('GetAttributes')
    def get_attributes(self):
        """Returns a ditc mapping attribute names onto values.

        Each attribute value is represented as a character string.
        Derived classes MUST override this method if they define any
        custom attribute mappings.

        The dictionary returned represents a copy of the information in
        the element and so may be modified by the caller."""
        attrs = copy(self._attrs)
        if self.id:
            attrs[self.__class__.ID] = self.id
        armap = self._armap()
        for attr_name, desc in dict_items(armap):
            name, encoder = desc
            value = getattr(self, attr_name, None)
            if isinstance(value, list):
                value = uspace.join(encoder(v) for v in value)
            elif isinstance(value, dict):
                lvalue = []
                for key, freq in dict_items(value):
                    lvalue = lvalue + [encoder(key)] * freq
                value = uspace.join(sorted(lvalue))
            elif value is not None:
                value = encoder(value)
            if value is not None:
                attrs[name] = value
        return attrs

    @old_method('SetAttribute')
    def set_attribute(self, name, value):
        """Sets the value of an attribute.

        name
            The name of the attribute to set

        value
            The value of the attribute (as a character string) or None
            to remove the attribute."""
        amap = self._amap()
        if name in amap:
            attr_name, decoder, vType = amap[name]
            if vType is list:
                if value is None:
                    value = []
                else:
                    value = value.split()
                setattr(self, attr_name, [decoder(v) for v in value])
            elif vType is dict:
                if value is None:
                    value = []
                else:
                    value = value.split()
                dvalue = {}
                for iv in (decoder(v) for v in value):
                    dvalue[iv] = dvalue.get(iv, 0) + 1
                setattr(self, attr_name, dvalue)
            else:
                x = getattr(self, attr_name, None)
                if type(x) in (list, dict):
                    logging.error(
                        "Problem setting %s in %s: single value will overwrite"
                        " List or Dict", name, self.__class__.__name__)
                if value is None:
                    setattr(self, attr_name, None)
                else:
                    setattr(self, attr_name, decoder(value))
        elif hasattr(self.__class__, 'ID') and name == self.__class__.ID:
            self.set_id(value)
        else:
            if value is None:
                if name in self._attrs:
                    del self._attrs[name]
            else:
                self._attrs[name] = value

    @old_method('GetAttribute')
    def get_attribute(self, name):
        """Gets the value of a single attribute as a string.

        If the element has no attribute with *name* then KeyError is
        raised.

        This method searches the attribute mappings and will return
        attribute values obtained by encoding the associated objects
        according to the mapping."""
        if name in self._attrs:
            return self._attrs[name]
        elif hasattr(self.__class__, 'ID') and name == self.__class__.ID:
            return self.id
        else:
            amap = self._amap()
            if name in amap:
                attr_name, decoder, vType = amap[name]
                value = getattr(self, attr_name, None)
            else:
                value = None
            if value is None:
                raise KeyError("Attribute value undefined: %s" % repr(name))
            armap = self._armap()
            unusedName, encoder = armap[attr_name]
            if isinstance(value, list):
                value = uspace.join(encoder(v) for v in value)
            elif isinstance(value, dict):
                lvalue = []
                for key, freq in dict_items(value):
                    lvalue = lvalue + [encoder(key)] * freq
                value = uspace.join(sorted(lvalue))
            else:
                value = encoder(value)
            return value

    @old_method('IsValidName')
    def is_valid_name(self, value):
        """Returns True if a character string is a valid NAME

        This test can be done standalone using the module function of
        the same name (this implementation defaults to using that
        function). By checking validity in the context of an element
        derived classes may override this test.

        This test is used currently only used when checking IDs (see
        :meth:`set_id`)"""
        return is_valid_name(value)

    @old_method('IsEmpty')
    def is_empty(self):
        """Whether this element *must* be empty.

        If the class defines the :attr:`XMLCONTENT` attribute then the
        model is taken from there and this method returns True only if
        XMLCONTENT is :attr:`ElementType.EMPTY`.

        Otherwise, the method defaults to False"""
        if hasattr(self.__class__, 'XMLCONTENT'):
            return self.__class__.XMLCONTENT == ElementType.EMPTY
        else:
            return False

    @old_method('IsMixed')
    def is_mixed(self):
        """Whether or not the element *may* contain mixed content.

        If the class defines the :attr:`XMLCONTENT` attribute then the
        model is taken from there and this method returns True only if
        XMLCONTENT is :attr:`ElementType.MIXED`.

        Otherwise, the method defaults to True"""
        if hasattr(self.__class__, 'XMLCONTENT'):
            return self.__class__.XMLCONTENT == ElementType.MIXED
        else:
            return True

    def get_children(self):
        """Returns an iterable of the element's children.

        This method iterates through the internal list of children only.
        Derived classes with custom models (i.e., those that define
        attributes to customise child element creation) MUST override
        this method.

        Each child is either a character string or an instance of
        Element (or a derived class thereof).  We do not represent
        comments, processing instructions or other meta-markup."""
        return iter(self._children)

    @old_method('GetCanonicalChildren')
    def get_canonical_children(self):
        """Returns children with canonical white space

        A wrapper for :py:meth:`get_children` that returns an iterable
        of the element's children canonicalized for white space as
        follows.  We check the current setting of xml:space, returning
        the same list of children as :py:meth:`get_children` if
        'preserve' is in force.  Otherwise we remove any leading space
        and collapse all others to a single space character."""
        children = self.get_children()
        # If there are no children there is nothing to do, so we don't catch
        # StopIteration.
        try:
            first_child = next(children)
        except StopIteration:
            return
        e = self
        while isinstance(e, Element):
            spc = e.get_space()
            if spc is not None:
                if spc == 'preserve':
                    yield first_child
                    try:
                        while True:
                            yield next(children)
                        # will raise StopIteration and terminate method
                    except StopIteration:
                        return
                else:
                    break
            if hasattr(e.__class__, 'SGMLCDATA'):
                yield first_child
                try:
                    while True:
                        yield next(children)
                except StopIteration:
                    return
            e = e.parent
        try:
            ichild = next(children)
        except StopIteration:
            # There was only one child
            if is_text(first_child):
                first_child = collapse_space(first_child)
            yield first_child
            return
        # Collapse strings to a single string entry and collapse spaces
        data = []
        if is_text(first_child):
            data.append(first_child)
            smode = True
        else:
            smode = False
            yield first_child
        while True:
            if is_text(ichild):
                data.append(ichild)
            else:
                if data:
                    data_child = collapse_space(''.join(data), smode)
                    if not smode or data_child != uspace:
                        # ignore a leading space completely
                        yield data_child
                    data = []
                yield ichild
                smode = False
            try:
                ichild = next(children)
                continue
            except StopIteration:
                if data:
                    data_child = collapse_space(''.join(data), smode)
                    if data_child == uspace:
                        # just white space, return empty string if we're the
                        # only child for consistency
                        if smode:
                            yield uempty
                        else:
                            # strip the whole last child
                            return
                    elif data_child[-1] == uspace:
                        # strip the trailing space form the last child
                        data_child = data_child[:-1]
                    yield data_child
                return

    def _find_factory(self, child_class):
        if hasattr(self, child_class.__name__):
            return child_class.__name__
        else:
            for parent in child_class.__bases__:
                fname = self._find_factory(parent)
                if fname:
                    return fname
            return None

    def get_or_add_child(self, child_class):
        """Returns the first child of type child_class

        If there is no child of that class then a new child is added."""
        children = self.find_children_depth_first(child_class, max_depth=1)
        try:
            return children.next()
        except StopIteration:
            return self.add_child(child_class)

    def add_child(self, child_class, name=None):
        """Adds a new child of the given class attached to this element.

        child_class
            A class  object (or callable) used to create a new instance.

        name
            The name given to the element (by the caller).  If no name
            is given then the default name for the child is used.  When
            the child returned is an existing instance, name is ignored.

        By default, an instance of child_class is created and attached
        to the internal list of child elements.

        Child creation can be customised to support a more natural
        mapping for structured elements as follows.  Firstly, the name
        of child_class (*not* the element name) is looked up in the
        parent (self), if there is no match, the method resolution order
        is followed for *child_class* looking up the names of each base
        in turn until a matching attribute is found.  If there are no
        matches then the default handling is performed.

        Otherwise, the behaviour is determined by the matching attribute
        as follows.

        1   If the attribute is None then a new instance of child_class
            is created and assigned to the attribute.

        2   If the attribute is a list then a new instance of child_class
            is created and appended to the attribute's value.

        3   Finally, if the attribute value is already an instance of
            child_class it is returned unchanged.

        4   Deprecated: A method attribute is called either without
            arguments (if the method name matches the child_class
            exactly) or with the child_class itself passed as an
            argument.  It must return the new child element.

        In summary, a new child is created and attached to the element's
        model *unless* the model supports a single element of the given
        child_class and the element already exists (as evidenced by an
        attribute with the name of child_class or one of its bases), in
        which case the existing instance is returned."""
        if self.is_empty():
            self.validation_error("Unexpected child element", name)
        child = None
        factory_name = self._find_factory(child_class)
        try:
            if factory_name:
                factory = getattr(self, factory_name)
                if isinstance(factory, MethodType):
                    warnings.warn(
                        "%s.%s method-based content model is deprecated" %
                        (type(self).__name__, factory_name),
                        DeprecationWarning, stacklevel=3)
                    if factory_name != child_class.__name__:
                        child = factory(child_class)
                    else:
                        child = factory()
                elif factory is None:
                    child = child_class(self)
                    setattr(self, factory_name, child)
                elif isinstance(factory, list):
                    child = child_class(self)
                    factory.append(child)
                elif isinstance(factory, child_class):
                    child = factory
                    child.reset(True)
                else:
                    raise TypeError(
                        factory_name, repr(factory), repr(child_class))
                if child is not None:
                    if name:
                        child.set_xmlname(name)
                    return child
            # else fall through to the default processing...
            child = child_class(self)
            self._children.append(child)
            if name:
                child.set_xmlname(name)
            return child
        except TypeError as e:
            import traceback
            logging.error("Error creating XML element: %s", e)
            traceback.print_exc()
            raise TypeError("Can't create %s in %s" %
                            (child_class.__name__, self.__class__.__name__))

    @old_method('DeleteChild')
    def remove_child(self, child):
        """Removes a child from this element's children.

        child
            An :class:`Element` instance that must be a direct child.
            That is, one that would be yielded by :meth:`get_children`.

        By default, we search the internal list of child elements.

        For content model customisation we follow the same name matching
        conventions as for child creation (see :meth:`add_child`).  If a
        matching attribute is found then we process them as follows:

        1   If the attribute's value is *child* then it is set to None,
            if it is not *child* then :class:`XMLUnknownChild` is raised.

        2   If the attribute is a list then we remove *child* from the
            list.  If *child* is not in the list
            :class:`XMLUnknownChild` is raised.

        3.  If the attribute is None then we raise
            :class:`XMLUnknownChild`."""
        if self.is_empty():
            raise XMLUnknownChild(child.xmlname)
        factory_name = self._find_factory(child.__class__)
        if factory_name:
            factory = getattr(self, factory_name)
            if factory is None:
                raise XMLUnknownChild(child.xmlname)
            elif isinstance(factory, list):
                match = False
                for i in range3(len(factory)):
                    if factory[i] is child:
                        child.detach_from_doc()
                        child.parent = None
                        del factory[i]
                        match = True
                        break
                if not match:
                    raise XMLUnknownChild(child.xmlname)
            elif factory is child:
                # Single allowable child is replaced with None
                child.detach_from_doc()
                child.parent = None
                setattr(self, factory_name, None)
            else:
                raise TypeError("%s.%s in Element.remove_child" %
                                (type(self).__name__, factory_name))
        else:
            match = False
            for i in range3(len(self._children)):
                if self._children[i] is child:
                    child.detach_from_doc()
                    child.parent = None
                    del self._children[i]
                    match = True
                    break
            if not match:
                raise XMLUnknownChild(child.xmlname)

    @old_method('FindChildren')
    def find_children(self, child_class, child_list, max=None):
        """Finds children of a given class

        **Deprecated in favour of**::

            list(e.find_children_depth_first(child_class, False))

        child_class
            A class object derived from :class:`Element`.  May also be a
            tuple as per the definition of the builtin isinstance
            function in python.

        child_list
            A list.  Matching children are appended to this.

        max (defaults to None)
            Maximum number of children to match (None means no limit).
            This value is used to check against the length of child_list
            so any elements already present will count towards the total.

        Nested matches are not included.  In other words, if the model
        of child_class allows further elements of type child_class as
        children (directly or indirectly) then only the top-level match
        is returned.  (Use :meth:`find_children_depth_first` for a way
        to return recursive lists of matching children.)

        The search is done depth first so children are returned in the
        logical order they would appear in the document."""
        warnings.warn(
            "Element.find_children is deprecated, use "
            "find_children_depth_first instead", DeprecationWarning,
            stacklevel=3)
        if max is not None and len(child_list) >= max:
            return
        for child in self.get_children():
            if isinstance(child, child_class):
                child_list.append(child)
            elif isinstance(child, Element):
                child.find_children(child_class, child_list, max)
            if max is not None and len(child_list) >= max:
                break

    @old_method('FindChildrenBreadthFirst')
    def find_children_breadth_first(self, child_class, sub_match=True,
                                    max_depth=1000, **kws):
        """Generates all children of a given class

        child_class
            A class object derived from :class:`Element`.  May also be a
            tuple as per the definition of the builtin isinstance
            function in python.

        sub_match (defaults to True)
            Matching elements are also scanned for nested matches.  If
            False, only the outer-most matching element is returned.

        max_depth
            Controls the maximum depth of the scan with level 1
            indicating direct children only.  It must be a positive
            integer and defaults to 1000.

        Warning: to reduce memory requirements when searching large
        documents this method performs a two-pass scan of the element's
        children, i.e., :py:meth:`get_children` will be called twice.

        Given that XML documents tend to be broader than they are deep
        :py:meth:`find_children_depth_first` is a better method to use
        for general purposes."""
        sub_match = kws.get('subMatch', sub_match)
        max_depth = kws.get('maxDepth', max_depth)
        max_depth = max_depth - 1
        for child in self.get_children():
            if isinstance(child, child_class):
                yield child
        if max_depth:
            for child in self.get_children():
                if isinstance(child, Element) and (
                        sub_match or not isinstance(child, child_class)):
                    for c in child.find_children_breadth_first(
                            child_class, max_depth):
                        yield c

    @old_method('FindChildrenDepthFirst')
    def find_children_depth_first(self, child_class, sub_match=True,
                                  max_depth=1000, **kws):
        """Generates all children of a given class

        child_class
            A class object derived from :class:`Element`.  May also be a
            tuple as per the definition of the builtin isinstance
            function in python.

        sub_match (defaults to True)
            Matching elements are also scanned for nested matches.  If
            False, only the outer-most matching element is returned.

        max_depth
            Controls the maximum depth of the scan with level 1
            indicating direct children only.  It must be a positive
            integer and defaults to 1000.

        Uses a depth-first scan of the element hierarchy rooted at the
        current element."""
        sub_match = kws.get('subMatch', sub_match)
        max_depth = kws.get('maxDepth', max_depth)
        max_depth = max_depth - 1
        for child in self.get_children():
            if isinstance(child, child_class):
                yield child
                if not sub_match:
                    continue
            if isinstance(child, Element) and max_depth > 0:
                for c in child.find_children_depth_first(
                        child_class, max_depth):
                    yield c

    @old_method('FindParent')
    def find_parent(self, parent_class):
        """Finds the first parent of the given class.

        parent_class
            A class object descended from :class:`Element`.

        Traverses the hierarchy through parent elements until a matching
        parent is found or returns None."""
        parent = self.parent
        while parent and not isinstance(parent, parent_class):
            if isinstance(parent, Element):
                parent = parent.parent
            else:
                parent = None
        return parent

    @old_method('AttachToParent')
    def attach_to_parent(self, parent):
        """Called to attach an orphan element to a parent.

        This method is not normally needed, when creating XML elements
        you would normally call :meth:`add_child` on the parent which
        ensures that elements are created in the context of a parent
        node.  The purpose of this method is to allow orphaned elements
        to be associated with a (new) parent.  For example, after being
        detached from one element hierarchy and attached to another.

        This method does not do any special handling of child elements,
        the caller takes responsibility for ensuring that this element
        will be returned by future calls to parent.get_children().
        However, :py:meth:`attach_to_doc` is called to ensure id
        registrations are made."""
        if self.parent:
            raise XMLParentError("Expected orphan")
        self.parent = parent
        self.attach_to_doc()

    @old_method('AttachToDocument')
    def attach_to_doc(self, doc=None):
        """Called when the element is first attached to a document.

        This method is not normally needed, when creating XML elements
        you would normally call :meth:`add_child` on the parent which
        ensures that elements are created in the context of a containing
        document.  The purpose of this method is to allow orphaned
        elements to be associated with a parent (document) after
        creation.  For example, after being detached from one element
        hierarchy and attached to another (possibly in a different
        document).

        The default implementation ensures that any ID attributes
        belonging to this element or its descendents are registered."""
        if doc is None:
            doc = self.get_document()
        if doc:
            if self.id:
                doc.register_element(self)
            for child in self.get_children():
                if isinstance(child, Element):
                    child.attach_to_doc(doc)

    @old_method('DetachFromParent')
    def detach_from_parent(self):
        """Called to detach an element from its parent

        The result is that this element becomes an orphan.

        This method does not do any special handling of child elements,
        the caller takes responsibility for ensuring that this element
        will no longer be returned by future calls to the (former)
        parent's :meth:`get_children` method.

        We do call :py:meth:`detach_from_doc` to ensure id registrations
        are removed and :attr:`parent` is set to None."""
        self.detach_from_doc()
        self.parent = None

    @old_method('DetachFromDocument')
    def detach_from_doc(self, doc=None):
        """Called when an element is being detached from a document.

        doc
            The document the element is being detached from, if None
            then this is determined automatically.  Provided as an
            optimisation for speed when detaching large parts of the
            element hierarchy.

        The default implementation ensures that any ID attributes
        belonging to this element or its descendents are unregistered."""
        if doc is None:
            doc = self.get_document()
        if doc:
            if self.id:
                doc.unregister_element(self)
            for child in self.get_children():
                if isinstance(child, Element):
                    child.detach_from_doc(doc)

    @old_method('AddData')
    def add_data(self, data):
        """Adds a character string to this element's children.

        This method raises a validation error if the element cannot take
        data children."""
        data = force_text(data)
        if self.is_mixed():
            if self._children and is_text(self._children[-1]):
                # To ease the comparison function we collapse string children
                self._children[-1] = self._children[-1] + data
            else:
                self._children.append(data)
        else:
            ws = True
            for c in data:
                if not is_s(c):
                    ws = False
                    break
            if not ws:
                self.validation_error("Unexpected data", data)

    @old_method('ContentChanged')
    def content_changed(self):
        """Notifies an element that its content has changed.

        Called by the parser once the element's attribute values and
        content have been parsed from the source.  Can be used to
        trigger any internal validation required following manual
        changes to the element.

        The default implementation tidies up the list of children
        reducing runs of data to a single unicode string to make future
        operations simpler and faster."""
        new_children = []
        data_children = []
        for child in self._children:
            if is_text(child):
                data_children.append(child)
            elif len(data_children) == 1:
                new_children.append(data_children[0])
                new_children.append(child)
                data_children = []
            elif len(data_children) > 1:
                new_children.append(''.join(data_children))
                new_children.append(child)
                data_children = []
            else:
                new_children.append(child)
        if len(data_children) == 1:
            new_children.append(data_children[0])
        elif len(data_children) > 1:
            new_children.append(''.join(data_children))
        self._children = new_children

    def generate_value(self, ignore_elements=False):
        """Generates strings representing the element's content

        A companion method to :meth:`get_value` which is useful when
        handling elements that contain a large amount of data).  For
        more information see :py:meth:`get_value`."""
        if not self.is_mixed():
            raise XMLMixedContentError(self.__class__.__name__)
        for child in self.get_children():
            if is_text(child):
                yield force_text(child)
            elif not ignore_elements:
                raise XMLMixedContentError(str(self))

    @old_method('GetValue')
    def get_value(self, ignore_elements=False):
        """Returns a single object representing the element's content.

        ignore_elements
            If True then any elements found in mixed content are
            ignored.  If False then any child elements cause
            :class:`XMLMixedContentError` to be raised.

        The default implementation returns a character string and is
        only supported for elements where mixed content is permitted
        (:py:meth:`is_mixed`).  It uses :py:meth:`generate_value` to
        iterate through the children.

        If the element is empty an empty string is returned.

        Derived classes may return more complex objects, such as values
        of basic python types or class instances that better represent
        the content of the element.

        You can pass *ignore_elements* as True to override this
        behaviour in the unlikely event that you want::

                <!-- elements like this... -->
                <data>This is <em>the</em> value</data>

                # to behave like this:
                data.get_value(True) == "This is  value" """
        return join_characters(self.generate_value(ignore_elements))

    @old_method('SetValue')
    def set_value(self, value):
        """Replaces the content of the element.

        value
            A character string used to replace the content of the
            element.  Derived classes may support a wider range of value
            types, if the default implementation encounters anything
            other than a character string it attempts to convert it
            before setting the content.

        The default implementation is only supported for elements where
        mixed content is permitted (see :py:meth:`is_mixed`) and only
        affects the internally maintained list of children.  Elements
        with more complex mixed models MUST override this method.

        If *value* is None then the element becomes empty."""
        if not self.is_mixed():
            raise XMLMixedContentError
        self.reset(False)
        if value is None:
            self._children = []
        else:
            self._children = [to_text(value)]

    def reset(self, reset_attrs=False):
        """Resets all children (and optionally attribute values).

        reset_attrs
            Whether or not to reset attribute values too.

            Called by the default implementation of :meth:`set_value`
            with reset_attrs=False, removes all children from the
            internally maintained list of children.

            Called by the default implementation of :meth:`add_child`
            with reset_attrs=True when an existing element instance is
            being recycled (obviating the constructor).  The default
            implementation removes only *unmapped* attribute values.
            Mapped atrribute values are not reset.

        Derived classes should call this method if they override the
        implementation of :meth:`set_value`.

        Derived classes with custom content models, i.e., those that
        provide a custom implementation for :meth:`get_children`, must
        override this method and treat it as an event associated with
        parsing the start tag of the element.  (This method is also a
        useful signal for resetting an state used for validating custom
        content models.)

        Required children should be reset and optional children should
        be orphaned using :meth:`detach_from_parent` and any references
        to them in instance attributes removed. Failure to override this
        method will can result in the child elements accumulating from
        one read to the next."""
        if reset_attrs:
            self._attrs = {}
        for child in self._children:
            if isinstance(child, Element):
                child.detach_from_doc()
                child.parent = None
        self._children = []

    @old_method('ValidationError')
    def validation_error(self, msg, data=None, aname=None):
        """Called when a validation error occurred in this element.

        msg
            Message suitable for logging and reporting the nature of the
            error.

        data
            The data that caused the error may be given in data.

        aname
            The attribute name may also be given indicating that the
            offending data was in an attribute of the element and not
            the element itself.

        The default implementation simply calls the containing
        Document's :meth:`Document.validation_error` method.  If the
        element is an orphan then :class:`XMLValidityError` is raised
        directly with *msg*."""
        doc = self.get_document()
        if doc:
            doc.validation_error(msg, self, data, aname)
        else:
            raise XMLValidityError(msg)

    @staticmethod
    @old_method('SortNames')
    def sort_names(name_list):
        """Sorts names in a predictable order

        name_list
            A list of element or attribute names

        The default implementation assumes that the names are strings or
        unicode strings so uses the default sort method."""
        name_list.sort()

    def __eq__(self, other):
        """Compares another element  with this one.

        XMLELement can only be compared with other Elements."""
        if not isinstance(other, Element):
            return NotImplemented
        # print "Comparing: <%s>,
        # <%s>"%(str(self.xmlname),str(other.xmlname))
        if self.xmlname != other.xmlname:
            return False
        # sort and compare all attributes
        self_attrs = self.get_attributes()
        self_attr_names = list(dict_keys(self_attrs))
        self.sort_names(self_attr_names)
        other_attrs = other.get_attributes()
        other_attr_names = list(dict_keys(other_attrs))
        other.sort_names(other_attr_names)
        if self_attr_names != other_attr_names:
            return False
        # print "Comparing attributes:
        # \n%s\n...\n%s"%(str(self_attr_names),str(other_attr_names))
        for i in range3(len(self_attr_names)):
            self_aname = self_attr_names[i]
            if self_attrs[self_aname] != other_attrs[self_aname]:
                return False
        self_children = list(self.get_canonical_children())
        other_children = list(other.get_canonical_children())
        return self_children == other_children

    def __ne__(self, other):
        if not isinstance(other, Element):
            return NotImplemented
        return not self.__eq__(other)

    def __bytes__(self):
        """Returns the XML element as a binary string.

        The resulting string is encoded with UTF-8."""
        s = io.BytesIO()
        for data in self.generate_xml(escape_char_data, root=True):
            s.write(data.encode('utf-8'))
        return s.getvalue()

    def __unicode__(self):
        """Returns the XML element as a unicode string"""
        s = io.StringIO()
        for data in self.generate_xml(escape_char_data, root=True):
            s.write(data)
        return s.getvalue()

    @old_method('Copy')
    def deepcopy(self, parent=None):
        """Creates a deep copy of this element.

        parent
            The parent node to attach the new element to.  If it is None
            then a new orphan element is created.

        This method mimics the process of serialisation and
        deserialisation (without the need to generate markup).  As a
        result, element attributes are serialised and deserialised to
        strings during the copy process."""
        if parent:
            e = parent.add_child(self.__class__, self.get_xmlname())
        else:
            e = self.__class__(None)
        attrs = self.get_attributes()
        for aname in dict_keys(attrs):
            e.set_attribute(aname, attrs[aname])
        for child in self.get_children():
            if is_text(child):
                e.add_data(child)
            else:
                child.deepcopy(e)
        e.content_changed()
        return e

    def get_base(self):
        """Returns the value of the xml:base attribute as a string."""
        return self._attrs.get(_xml_base, None)

    def set_base(self, base):
        """Sets the value of the xml:base attribute from a string.

        Changing the base of an element effects the interpretation of all
        relative URIs in this element and its children."""
        if base is None:
            self._attrs.pop(_xml_base, None)
        else:
            self._attrs[_xml_base] = str(base)

    @old_method('ResolveBase')
    def resolve_base(self):
        """Returns the base of the current element.

        The URI is calculated using any xml:base values of the element
        or its ancestors and ultimately relative to the base URI of the
        document itself.

        If the element is not contained by a Document, or the document does
        not have a fully specified base_uri then the return result may be a
        relative path or even None, if no base information is available.

        The return result is always None or a character string, such as
        would be obtained from the xml:base attribute."""
        baser = self
        base_uri = None
        while baser:
            rebase = baser.get_base()
            if base_uri:
                # TODO: add the current document
                if rebase:
                    base_uri = base_uri.resolve(rebase)
                # base_uri = urlparse.urljoin(rebase, base_uri)
            elif rebase:
                base_uri = uri.URI.from_octets(rebase)
            baser = baser.parent
        return None if base_uri is None else str(base_uri)

    @old_method('ResolveURI')
    def resolve_uri(self, uriref):
        """Resolves a URI reference in the current context.

        uriref
            A :class:`pyslet.rfc2396.URI` instance or a string
            that one can be parsed from.

        The argument is resolved relative to the xml:base values of the
        element's ancestors and ultimately relative to the document's
        base.  Ther result may still be a relative URI, there may be no
        base set or the base may only be known in relative terms.

        For example, if the Document was loaded from the URL::

            http://www.example.com/images/catalog.xml

        and *e* is an element in that document then::

            e.resolve_uri('smiley.gif')

        would return a URI instance representing the fully-specified
        URI::

            http://www.example.com/images/smiley.gif
        """
        if not isinstance(uriref, uri.URI):
            uriref = uri.URI.from_octets(uriref)
        base_uri = self.resolve_base()
        if base_uri:
            return uriref.resolve(base_uri)
        else:
            return uriref

    @old_method('RelativeURI')
    def relative_uri(self, href):
        """Returns href expressed relative to the element's base.

        href
            A :class:`pyslet.rfc2396.URI` instance or a string
            that one can be parsed from.

        If href is already a relative URI then it is converted to a
        fully specified URL by interpreting it as being the URI of a
        *file* expressed relative to the current working directory.

        For example, if the Document was loaded from the URL::

            http://www.example.com/images/catalog.xml

        and *e* is an element in that document then::

            e.relatitve_uri('http://www.example.com/images/smiley.gif')

        would return a URI instance representing relative URI::

            'smiley.gif'

        If the element does not have a fully-specified base URL then the
        result is a fully-specified URL itself."""
        if not isinstance(href, uri.URI):
            href = uri.URI.from_octets(href)
        if not href.is_absolute():
            href = href.resolve(uri.URI.from_path(os.getcwd()))
        base = self.resolve_base()
        if base is not None:
            return href.relative(base)
        else:
            return href

    def get_lang(self):
        """Returns the value of the xml:lang attribute as a string."""
        return self._attrs.get(_xml_lang, None)

    def set_lang(self, lang):
        """Sets the value of the xml:lang attribute from a string.

        See :py:meth:`resolve_lang` for how to obtain the effective
        language of an element."""
        if lang is None:
            self._attrs.pop(_xml_lang, None)
        else:
            self._attrs[_xml_lang] = lang

    @old_method('ResolveLang')
    def resolve_lang(self):
        """Returns the effective language for the current element.

        The language is resolved using the xml:lang value of the element
        or its ancestors.  If no xml:lang is in effect then None is
        returned."""
        baser = self
        while baser:
            lang = baser.get_lang()
            if lang:
                return lang
            baser = baser.parent
        return None

    def get_space(self):
        """Gets the value of the xml:space attribute"""
        return self._attrs.get(_xml_space, None)

    @old_method('SetSpace')
    def set_space(self, space):
        """Sets the xml:space attribute

        space
            A character string containing the new value or None to clear
            the attribute definition on this element."""
        if space is None:
            self._attrs.pop(_xml_space, None)
        else:
            self._attrs[_xml_space] = space

    def resolve_space(self, space):
        """Returns the effective space policy for the current element.

        The policy is resolved using the value returned by
        :meth:`get_space` on this element or its ancestors.  If no space
        policy is in effect then None is returned."""
        baser = self
        while baser:
            spc = baser.get_space()
            if spc:
                return spc
            baser = baser.parent
        return None

    @old_method('PrettyPrint')
    def can_pretty_print(self):
        """True if this element's content may be pretty-printed.

        This method is used when formatting XML files to text streams.
        The output is also affected by the xml:space attribute.  Derived
        classes can override the default behaviour.

        The difference between this method and the xml:space attribute
        is that this method indicates if white space can be safely
        *added* to the output to improve formatting by inserting line
        feeds to break it over multiple lines and to insert spaces
        or tab characters to indent tags.

        On the other hand, xml:space='preserve' indicates that white
        space in the original document must not be taken away.  It
        therefore makes sense that if :meth:`get_space` returns
        'preserve' we will return False.  Derived classes may consider
        providing an implementation of get_space that always return
        'preserve' and using the default implementation of this method.

        This method will return False if one of the following is true:

        *   the special attribute SGMLCDATA is present

        *   the special content model attribute :attr:`XMLCONTENT`
            indicates that the element may contain mixed content (this
            is the default for generic instances of :class:`Element`)

        *   :meth:`get_space` is set to 'preserve' (xml:space)

        *   self.parent.can_pretty_print() returns False

        Otherwise we return True."""
        if hasattr(self, 'SGMLCDATA') or self.XMLCONTENT == ElementType.MIXED:
            return False
        spc = self.get_space()
        if spc is not None and spc == 'preserve':
            return False
        if isinstance(self.parent, Element) and \
                not self.parent.can_pretty_print():
            return False
        return True

    @old_method('WriteXMLAttributes')
    def write_xml_attributes(self, attributes,
                             escape_function=escape_char_data, root=False,
                             **kws):
        """Creates strings serialising the element's attributes

        attributes
            A list of character strings

        escape_function
            The function that will be used to escape character data. The
            default is :func:`escape_char_data`.  The alternate name
            *escapeFunction* is supported for backwards compatibility.

        root
            Indicates if this element should be treated as the root
            element. By default there is no special action required but
            derived classes may need to generate additional attributes,
            such as those that relate to the namespaces or schema used
            by the element.

        The attributes are generated as strings of the form
        'name="value"' with values escaped appropriately for serialised
        XML output.  The attributes are always sorted into a predictable
        order (based on attribute name) to ensure that identical
        documents produce identical output."""
        escape_function = kws.get('escapeFunction', escape_function)
        attrs = self.get_attributes()
        keys = list(dict_keys(attrs))
        self.sort_names(keys)
        for a in keys:
            attributes.append(
                ul('%s=%s') % (a, escape_function(attrs[a], True)))

    @old_method('GenerateXML')
    def generate_xml(self, escape_function=escape_char_data, indent='',
                     tab='\t', root=False, **kws):
        """A generator that yields serialised XML

        escape_function
            The function that will be used to escape character data. The
            default is :func:`escape_char_data`.  The alternate name
            *escapeFunction* is supported for backwards compatibility.

        indent (defaults to an empty string)
            The string to use for passing any inherited indent, used in
            combination with the tab parameter for pretty printing. See
            below.

        tab (defaults to '\\t')
            Whether or not indentation will be used is determined by the
            tab parameter.  If it is empty then no pretty-printing is
            performed for the element, otherwise the element will start
            with a line-feed followed by any inherited *indent* and
            finally followed by the content of *tab*.  For example, if
            you prefer to have your XML serialised with a 4-space indent
            then pass tab='    '.

            If the element is in a context where pretty printing is not
            allowed (see :meth:`can_pretty_print`) then tab is ignored.

        root (defaults to False)
            Indicates if this is the root element of the document.  See
            :meth:`write_xml_attributes`.

        Yields character strings."""
        escape_function = kws.get('escapeFunction', escape_function)
        if tab:
            ws = '\n' + indent
            indent = indent + tab
        else:
            ws = ''
        if not self.can_pretty_print():
            # inline all children
            indent = ''
            tab = ''
        attributes = []
        self.write_xml_attributes(attributes, escape_function, root=root)
        if attributes:
            attributes[0:0] = ['']
            attributes = uspace.join(attributes)
        else:
            attributes = ''
        children = self.get_canonical_children()
        try:
            child = next(children)
            if is_text(child) and len(child) > 0 and is_s(child[0]):
                # First character is WS, so assume pre-formatted
                indent = tab = ''
            yield ul('%s<%s%s>') % (ws, self.xmlname, attributes)
            if hasattr(self.__class__, 'SGMLCDATA'):
                # When expressed in SGML this element would have type CDATA so
                # put it in a CDSect
                yield escape_cdsect(self.get_value())
            else:
                while True:
                    if is_text(child):
                        # We force encoding of carriage return as these are
                        # subject to removal
                        yield escape_function(child)
                        # if we have character data content skip closing ws
                        ws = ''
                    else:
                        for s in child.generate_xml(escape_function, indent,
                                                    tab):
                            yield s
                    try:
                        child = next(children)
                    except StopIteration:
                        break
            if not tab:
                # if we weren't tabbing children we need to skip closing white
                # space
                ws = ''
            yield ul('%s</%s>') % (ws, self.xmlname)
        except StopIteration:
            yield ul('%s<%s%s/>') % (ws, self.xmlname, attributes)

    @old_method('WriteXML')
    def write_xml(self, writer, escape_function=escape_char_data, indent='',
                  tab='\t', root=False, **kws):
        """Writes serialized XML to an output stream

        writer
            A file or file-like object operating in binary mode.

        The other arguments follow the same pattern as
        :meth:`generate_xml` which this method uses to create the output
        which is always UTF-8 encoded."""
        escape_function = kws.get('escapeFunction', escape_function)
        for s in self.generate_xml(escape_function, indent, tab, root):
            writer.write(s.encode('utf-8'))


class XMLContentParticle(object):

    # : Occurrence constant for particles that must appear exactly once
    ExactlyOnce = 0
    ZeroOrOne = 1       #: Occurrence constant for '?'
    ZeroOrMore = 2  # : Occurrence constant for '*'
    OneOrMore = 3       #: Occurrence constant for '+'

    def __init__(self):
        """An object for representing content particles."""
        self.occurrence = XMLContentParticle.ExactlyOnce
        """One of the occurrence constants defined above."""

    def build_particle_maps(self, exit_particles):
        """Abstract method that builds the particle maps for this node or its children.

        For more information see :py:attr:`XMLNameParticle.particle_map`.

        Although only name particles have particle maps this method is called
        for all particle types to allow the model to be built hierarchically
        from the root out to the terminal (name) nodes.  *exit_particles*
        provides a mapping to all the following particles outside the part of
        the hierarchy rooted at the current node that are directly reachable
        from the particles inside."""
        raise NotImplementedError

    def seek_particles(self, pmap):
        """Adds all possible entry particles to pmap.

        Abstract method, *pmap* is a mapping from element name to a list
        of :py:class:`XMLNameParticles XMLNameParticle`.

        Returns True if a required particle was added, False if all
        particles added are optional.

        Like :py:meth:`build_particle_maps`, this method is called for
        all particle types.  The mappings requested represent all
        particles inside the part of the hierarchy rooted at the current
        node that are directly reachable from the preceeding particles
        outside."""
        raise NotImplementedError

    def add_particles(self, src_map, pmap):
        """A utility method that adds particles from src_map to pmap.

        Both maps are mappings from element name to a list of
        :py:class:`XMLNameParticles XMLNameParticle`. All entries in
        *src_map* not currently in *pmap* are added."""
        for name in dict_keys(src_map):
            if name in pmap:
                # add items from src_map[name] to pmap[name]
                target_list = pmap[name]
            elif name:
                # add items to a new list
                pmap[name] = target_list = []
            else:
                # add end tag sentinel
                pmap[name] = target_list = None
            if target_list is not None:
                # this double loop looks dangerous but the lists will
                # usually be 1 or 2 particles long at most -
                # pathological cases are possible but are best dealt
                # with elsewhere (by restricting the number of particles
                # overall, say).
                for ip in src_map[name]:
                    dup = False
                    for jp in target_list:
                        if ip is jp:
                            dup = True
                            break
                    if not dup:
                        target_list.append(ip)

    def is_deterministic(self, pmap):
        """A utility method for identifying deterministic particle maps.

        A deterministic particle map is one in which each name maps
        uniquely to a single content particle.  A non-deterministic
        particle map contains an ambiguity, for example ((b,d)|(b,e)).
        The particle map created by :py:meth:`seek_particles` for the
        enclosing choice list would have two entries for 'b', one to map
        the first particle of the first sequence and one to the first
        particle of the second sequence.

        Although non-deterministic content models are not allowed in
        SGML they are tolerated in XML and are only flagged as
        compatibility errors."""
        if pmap:
            for name in dict_keys(pmap):
                if pmap[name] is not None and len(pmap[name]) > 1:
                    return False
        return True


class XMLNameParticle(XMLContentParticle):

    """Represents a content particle for a named element"""

    def __init__(self):
        XMLContentParticle.__init__(self)
        #: the name of the element type that matches this particle
        self.name = None
        self.particle_map = {}
        """Each :py:class:`XMLNameParticle` has a particle map that maps
        the name of the 'next' element found in the content model to the
        list of possible :py:class:`XMLNameParticles XMLNameParticle`
        that represent it in the content model.

        The content model can be traversed using
        :py:class:`ContentParticleCursor`."""

    def build_particle_maps(self, exit_particles):
        self.particle_map = {}
        if self.occurrence == XMLContentParticle.ZeroOrMore or \
                self.occurrence == XMLContentParticle.OneOrMore:
            # repeatable element, add ourselves to the map
            self.particle_map[self.name] = [self]
        self.add_particles(exit_particles, self.particle_map)

    def seek_particles(self, pmap):
        if self.name in pmap:
            target_list = pmap[self.name]
            dup = False
            for p in target_list:
                if p is self:
                    dup = True
            if not dup:
                target_list.append(self)
        else:
            pmap[self.name] = [self]
        return self.occurrence == XMLContentParticle.OneOrMore or \
            self.occurrence == XMLContentParticle.ExactlyOnce

    def is_deterministic(self):
        return XMLContentParticle.is_deterministic(self, self.particle_map)


class XMLChoiceList(XMLContentParticle):

    """Represents a choice list of content particles in the grammar"""

    def __init__(self):
        XMLContentParticle.__init__(self)
        self.children = []

    def build_particle_maps(self, exit_particles):
        new_exit_particles = {}
        if self.occurrence == XMLContentParticle.ZeroOrMore or \
                self.occurrence == XMLContentParticle.OneOrMore:
            # repeatable element means all our entry points are also exit
            # points for our children
            self.seek_particles(new_exit_particles)
        # Now add the exit points already provided
        self.add_particles(exit_particles, new_exit_particles)
        # Finally, build the maps of our children
        for child in self.children:
            child.build_particle_maps(new_exit_particles)

    def seek_particles(self, pmap):
        required = self.occurrence == XMLContentParticle.ExactlyOnce or \
            self.occurrence == XMLContentParticle.OneOrMore
        for child in self.children:
            # choice means all children are included
            if not child.seek_particles(pmap):
                # if we have just one optional child we are effectively
                # optional
                required = False
        return required

    def is_deterministic(self, pmap=None):
        if pmap:
            if not XMLContentParticle.is_deterministic(self, pmap):
                return False
        for child in self.children:
            if not child.is_deterministic():
                return False
        return True


class XMLSequenceList(XMLContentParticle):

    """Represents a sequence list of content particles in the grammar"""

    def __init__(self):
        XMLContentParticle.__init__(self)
        self.children = []

    def build_particle_maps(self, exit_particles):
        new_exit_particles = {}
        if self.occurrence == XMLContentParticle.ZeroOrMore or \
                self.occurrence == XMLContentParticle.OneOrMore:
            # repeatable element means all our entry points are also
            # exit points
            self.seek_particles(new_exit_particles)
        # Now add the exit points already provided
        self.add_particles(exit_particles, new_exit_particles)
        for i in range3(len(self.children)):
            child = self.children[i]
            # The exit points of child are the entry points of child+1,
            # but if child+1 is optional then we have to include
            # child+2, and so on...
            child_exits = {}
            j = i + 1
            while True:
                if j < len(self.children):
                    if self.children[j].seek_particles(child_exits):
                        break
                    else:
                        j = j + 1
                else:
                    # all children following in the sequence are optional
                    self.add_particles(new_exit_particles, child_exits)
                    break
            child.build_particle_maps(child_exits)

    def seek_particles(self, pmap):
        optional = True
        for child in self.children:
            # sequence means include all children up to and including first
            # required child
            if child.seek_particles(pmap):
                optional = False
                break
        optional = optional or \
            self.occurrence == XMLContentParticle.ZeroOrOne or \
            self.occurrence == XMLContentParticle.ZeroOrMore
        return not optional

    def is_deterministic(self, pmap=None):
        if pmap:
            if not XMLContentParticle.is_deterministic(self, pmap):
                return False
        for child in self.children:
            if not child.is_deterministic():
                return False
        return True


class XMLAttributeDefinition(object):

    """Represents an Attribute declaration

    There is no special functionality provided by this class, instances
    hold the data members identified and the class defines a number of
    constants suitable for setting and testing them.

    Contants are defined using CAPS, mixed case versions are provided
    only for backwards compatibility."""

    CDATA = 0       #: Type constant representing CDATA
    ID = 1          #: Type constant representing ID
    IDREF = 2       #: Type constant representing IDREF
    IDREFS = 3      #: Type constant representing IDREFS
    ENTITY = 4      #: Type constant representing ENTITY
    ENTITIES = 5    #: Type constant representing ENTITIES
    NMTOKEN = 6     #: Type constant representing NMTOKEN
    NMTOKENS = 7    #: Type constant representing NMTOKENS
    NOTATION = 8    #: Type constant representing NOTATION
    ENUMERATION = 9
    """Type constant representing an enumeration, not defined as a
    keyword in the specification but representing declarations that
    match production [59], Enumeration."""

    CData = 0
    IDRef = 2
    IDRefs = 3
    Entity = 4
    Entities = 5
    NmToken = 6
    NmTokens = 7
    Notation = 8
    Enumeration = 9

    @staticmethod
    def type_to_str(t):
        return {
            0: 'CDATA',
            1: 'ID',
            2: 'IDREF',
            3: 'IDREFS',
            4: 'ENTITY',
            5: 'ENTITIES',
            6: 'NMTOKEN',
            7: 'NMTOKENS',
            8: 'NOTATION',
            9: 'Enumeration'}[t]

    IMPLIED = 0     #: Presence constant representing #IMPLIED
    REQUIRED = 1    #: Presence constant representing #REQUIRED
    FIXED = 2       #: Presence constant representing #FIXED
    DEFAULT = 3
    """Presence constant representing a declared default value.  Not
    defined as a keyword but represents a declaration with a default
    value defined in production [60]."""

    Implied = 0
    Required = 1
    Fixed = 2
    Default = 3

    @staticmethod
    def presence_to_str(p):
        return {
            0: '#IMPLIED',
            1: '#REQUIRED',
            2: '#FIXED',
            3: 'Default'}[p]

    def __init__(self):
        #: the entity in which this attribute was declared
        self.entity = None
        #: the name of the attribute
        self.name = None
        #: One of the above type constants
        self.type = XMLAttributeDefinition.CDATA
        #: An optional dictionary of values
        self.values = None
        # : One of the above presence constants
        self.presence = XMLAttributeDefinition.IMPLIED
        #: An optional default value
        self.defaultValue = None


class XMLEntity(MigratedClass):

    """Represents an XML entity.

    This object serves two purposes, it acts as both the object used to
    store information about declared entities and also as a parser for
    feeding unicode characters to the main :py:class:`XMLParser`.

    src
        May be a character string, a binary string, an instance of
        :py:class:`pyslet.rfc2396.URI`, an instance of
        :py:class:`pyslet.http.client.ClientResponse` or any object
        that supports file-like behaviour (seek and read).

        If provided, the corresponding open method is called
        immediately, see :meth:`open_unicode`, :meth:`open_string`,
        :meth:`open_uri`, :meth:`open_http_response` and
        :meth:`open_file`.

    encoding
        If src is not None then this value will be passed when opening
        the entity reader.

    req_manager
        If src is a URI, passed to :meth:`open_uri`

    XMLEntity objects act as context managers, hence it is possible
    to use::

        with XMLEntity(src=URI.from_octets('mydata.xml')) as e:
            # process the entity here, will automatically close
    """

    def __init__(self, src=None, encoding=None, req_manager=None, **kws):
        req_manager = kws.get('reqManager', req_manager)
        self.location = None
        """the location of this entity (used as the base URI to resolve
        relative links).  A :class:`pyslet.rfc2396.URI` instance."""
        self.mimetype = None
        """The mime type of the entity, if known, or None otherwise.  A
        :class:`pyslet.http.params.MediaType` instance."""
        #: the encoding of the entity (text entities), e.g., 'utf-8'
        self.encoding = None
        self.data_source = None
        self.char_source = None
        self.close_source = False
        self.bom = False
        """Flag to indicate whether or not the byte order mark was
        detected.  If detected the flag is set to True.  An initial byte
        order mark is not reported in :py:attr:`the_char` or by the
        :py:meth:`next_char` method."""
        self.the_char = None
        """The character at the current position in the entity"""
        self.line_num = None
        """The current line number within the entity (first line is line
        1)"""
        self.line_pos = None
        """the current character position within the entity (first char
        is 1)"""
        #: used by :py:meth:`XMLParser.push_entity`
        self.buff_text = ul('')
        self.base_pos = None
        self.char_seek = None
        self.chunk = None
        self.chars = ''
        self.char_pos = None
        self.ignore_lf = None
        self.flags = {}
        if is_unicode(src):
            self.open_unicode(src)
        elif isinstance(src, uri.URI):
            self.open_uri(src, encoding, req_manager)
        elif isinstance(src, http.ClientResponse):
            self.open_http_response(src, encoding)
        elif isinstance(src, bytes):
            self.open_string(src, encoding)
        elif src is not None:
            self.open_file(src, encoding)

    chunk_size = io.DEFAULT_BUFFER_SIZE
    """Characters are read from the data_source in chunks.

    The default chunk size is set from io.DEFAULT_BUFFER_SIZE, typically
    8KB.

    In fact, in some circumstances the entity reader starts more
    cautiously.  If the entity reader expects to read an XML or Text
    declaration, which may have an encoding declaration then it reads
    one character at a time until the declaration is complete.  This
    allows the reader to change to the encoding in the declaration
    without causing errors caused by reading too many characters using
    the wrong codec.  See :py:meth:`change_encoding` and
    :py:meth:`keep_encoding` for more information."""

    def get_name(self):
        """Returns a name to represent this entity

        The name is intended for logs and error messages.  It defaults
        to the location if set."""
        if self.location is None:
            return repr(self)
        else:
            return str(self.location)

    def is_external(self):
        """Returns True if this is an external entity.

        The default implementation returns True if *location* is not
        None, False otherwise."""
        return self.location is not None

    @old_method('Open')
    def open(self):
        """Opens the entity for reading.

        The default implementation uses :py:meth:`open_uri` to open the
        entity from :py:attr:`location` if available, otherwise it
        raises NotImplementedError."""
        if self.location:
            self.open_uri(self.location)
        else:
            raise NotImplementedError

    def is_open(self):
        """Returns True if the entity is open for reading."""
        return not (self.char_source is None)

    def open_unicode(self, src):
        """Opens the entity from a unicode string."""
        # a white lie to ensure that all entities have an encoding
        self.encoding = 'utf-8'
        self.data_source = None
        self.chunk = XMLEntity.chunk_size
        self.char_source = io.StringIO(src)
        self.close_source = True
        self.base_pos = self.char_source.tell()
        self.reset()

    def open_string(self, src, encoding=None):
        """Opens the entity from a binary string.

        src
            A binary string.

        encoding
            The optional *encoding* is used to convert the string to
            unicode and defaults to None - meaning that the
            auto-detection method will be applied.

        The advantage of using this method instead of converting the
        string to unicode and calling :py:meth:`open_unicode` is that
        this method creates a unicode reader object to parse the string
        instead of making a copy of it in memory."""
        self.encoding = encoding
        self.data_source = io.BytesIO(src)
        self.close_source = True
        if self.encoding is None:
            self.auto_detect_encoding(self.data_source)
        self.chunk = 1
        self.char_source = codecs.getreader(self.encoding)(self.data_source)
        self.base_pos = self.char_source.tell()
        self.reset()

    def open_file(self, src, encoding='utf-8'):
        """Opens the entity from a file

        src
            An existing (open) binary file.

        The optional *encoding* provides a hint as to the intended
        encoding of the data and defaults to UTF-8.  Unlike other Open*
        methods we do not assume that the file is seekable however, you
        may set encoding to None for a seekable file thus invoking
        auto-detection of the encoding."""
        self.encoding = encoding
        self.data_source = src
        if self.encoding is None:
            self.auto_detect_encoding(self.data_source)
        if self.encoding is None:
            self.encoding = 'utf-8'
            self.char_source = self.data_source
            self.data_source = None
        else:
            self.char_source = codecs.getreader(
                self.encoding)(self.data_source)
        self.chunk = 1
        try:
            self.base_pos = self.char_source.tell()
        except io.UnsupportedOperation:
            self.base_pos = None
        self.reset()

    def open_uri(self, src, encoding=None, req_manager=None, **kws):
        """Opens the entity from a URI.

        src
            A :class:`pyslet.rfc2396.URI` instance of either file,
            http or https schemes.

        encoding
            The optional *encoding* provides a hint as to the intended
            encoding of the data and defaults to UTF-8.  For http(s)
            resources this parameter is only used if the charset cannot
            be read successfully from the HTTP headers.

        req_manager
            The optional *req_manager* allows you to pass an existing
            instance of :py:class:`pyslet.http.client.Client` for
            handling URI with http or https schemes.  (reqManager is
            supported for backwards compatibility.)"""
        if 'reqManager' in kws:
            req_manager = kws['reqManager']
        self.location = src
        if isinstance(src, uri.FileURL):
            self.data_source = open(src.get_pathname(), 'rb')
            self.close_source = True
            self.encoding = encoding
            if self.encoding is None:
                # Given that we know we have a file we can use some
                # auto-detection logic to discover the correct encoding
                self.auto_detect_encoding(self.data_source)
            self.open_file(self.data_source, self.encoding)
        elif src.scheme.lower() in ['http', 'https']:
            if req_manager is None:
                req_manager = http.Client()
            req = http.ClientRequest(str(src))
            req.set_header('Accept', "application/xml, text/*, */*")
            req_manager.process_request(req)
            if req.status == 200:
                self.open_http_response(req.response, encoding)
                mtype = req.response.get_content_type()
                if mtype is None:
                    raise NotImplementedError
                else:
                    self.mimetype = mtype.type.lower() + '/' + \
                        mtype.subtype.lower()
                    try:
                        self.encoding = \
                            mtype['charset'].decode('latin-1').lower()
                    except KeyError:
                        if mtype.type.lower() == 'text':
                            # Text types default to iso-8859-1
                            self.encoding = "iso-8859-1"
                # print "...reading %s stream with
                # charset=%s"%(self.mimetype,self.encoding)
                self.data_source = io.BytesIO(req.res_body)
                self.close_source = True
                if self.encoding is None:
                    self.auto_detect_encoding(self.data_source)
                self.open_file(self.data_source, self.encoding)
            elif req.status == 404:
                raise XMLMissingResourceError(
                    str(req.status) + " " + str(req.response.reason))
            else:
                raise XMLUnexpectedHTTPResponse(
                    str(req.status) + " " + str(req.response.reason))
        else:
            raise XMLUnsupportedSchemeError

    def open_http_response(self, src, encoding='utf-8'):
        """Opens the entity from an HTTP response passed in *src*.

        src
            An :class:`pyslet.http.client.ClientResponse` instance.

        encoding
            The optional *encoding* provides a hint as to the intended
            encoding of the data and defaults to UTF-8.  This parameter
            is only used if the charset cannot be read successfully from
            the HTTP response headers."""
        self.encoding = encoding
        # update the entity's location with the last URL used to
        # retrieve it
        self.location = src.request.url
        mtype = src.get_content_type()
        if mtype is None:
            raise NotImplementedError
        else:
            self.mimetype = mtype.type.lower() + '/' + mtype.subtype.lower()
            try:
                self.encoding = mtype['charset'].decode('latin-1').lower()
            except KeyError:
                if mtype.type.lower() == 'text':
                    # Text types default to iso-8859-1
                    self.encoding = "iso-8859-1"
        # print "...reading %s stream with
        # charset=%s"%(self.mimetype,self.encoding)
        self.data_source = io.BytesIO(src.request.res_body)
        self.close_source = True
        self.open_file(self.data_source, self.encoding)

    def reset(self):
        """Resets an open entity

        The entity returns to the first character in the entity."""
        if self.char_source is None:
            self.the_char = None
            self.base_pos = None
        elif self.base_pos is not None:
            self.char_source.seek(self.base_pos)
            self.the_char = ''
        else:
            self.the_char = ''
        self.line_num = 1
        self.line_pos = 0
        self.chars = ''
        self.char_seek = self.base_pos
        self.char_pos = -1
        self.ignore_lf = False
        self.next_char()
        # python handles the utf-16 BOM automatically but we have to
        # skip it for utf-8
        if self.the_char == character(0xFEFF) and \
                self.encoding is not None and self.encoding.lower() == 'utf-8':
            self.next_char()

    def get_position_str(self):
        """A short string describing the current position.

        For example, if the current character is pointing to character 6
        of line 4 then it will return the string 'Line 4.6'"""
        return "Line %i.%i" % (self.line_num, self.line_pos)

    def next_char(self):
        """Advances to the next character in an open entity.

        This method takes care of the End-of-Line handling rules for XML
        which force us to remove any CR characters and replace them with
        LF if they appear on their own or to silenty drop them if they
        appear as part of a CR-LF combination."""
        if self.the_char is None:
            return
        self.char_pos = self.char_pos + 1
        self.line_pos = self.line_pos + 1
        if self.char_pos >= len(self.chars):
            if self.char_seek is not None:
                self.char_seek = self.char_source.tell()
            self.chars = self.char_source.read(self.chunk)
            self.char_pos = 0
        if self.char_pos >= len(self.chars):
            self.the_char = None
        else:
            self.the_char = self.chars[self.char_pos]
            if self.the_char == '\x0D':
                # change to a line feed and ignore the next line feed
                self.the_char = '\x0A'
                self.ignore_lf = True
                self.next_line()
            elif self.the_char == '\x0A':
                if self.ignore_lf:
                    self.ignore_lf = False
                    self.next_char()
                else:
                    self.next_line()
            else:
                self.ignore_lf = False

    magic_table = {
        # UCS-4, big-endian machine (1234 order)
        b'\x00\x00\xfe\xff': ('utf_32_be', 4, True),
        # UCS-4, little-endian machine (4321 order)
        b'\xff\xfe\x00\x00': ('utf_32_le', 4, True),
        # UCS-4, unusual octet order (2143)
        b'\x00\x00\xff\xfe': ('utf_32', 4, True),
        # UCS-4, unusual octet order (3412)
        b'\xfe\xff\x00\x00': ('utf_32', 4, True),
        # UTF-16, big-endian
        b'\xfe\xff': ('utf_16_be', 2, True),
        # UTF-16, little-endian
        b'\xff\xfe': ('utf_16_le', 2, True),
        # UTF-8 with byte order mark
        b'\xef\xbb\xbf': ('utf-8', 3, True),
        # UCS-4 or other encoding with a big-endian 32-bit code unit
        b'\x00\x00\x00\x3c': ('utf_32_be', 0, False),
        # UCS-4 or other encoding with a little-endian 32-bit code unit
        b'\x3c\x00\x00\x00': ('utf_32_le', 0, False),
        # UCS-4 or other encoding with an unusual 32-bit code unit
        b'\x00\x00\x3c\x00': ('utf_32_le', 0, False),
        # UCS-4 or other encoding with an unusual 32-bit code unit
        b'\x00\x3c\x00\x00': ('utf_32_le', 0, False),
        # UTF-16BE or big-endian ISO-10646-UCS-2 or other encoding with a
        # 16-bit code unit
        b'\x00\x3c\x00\x3f': ('utf_16_be', 0, False),
        # UTF-16LE or little-endian ISO-10646-UCS-2 or other encoding with
        # a 16-bit code unit
        b'\x3c\x00\x3f\x00': ('utf_16_le', 0, False),
        # UTF-8, ISO 646, ASCII or similar
        b'\3c\x3f\x78\x6D': ('utf_8', 0, False),
        # EBCDIC (in some flavor)
        b'\4c\x6f\xa7\x94': ('cp500', 0, False)
    }

    def auto_detect_encoding(self, src_file):
        """Auto-detects the character encoding

        src_file
            A file object.  The object must support seek and blocking
            read operations.  If src_file has been opened in text mode
            then no action is taken."""
        src_file.seek(0)
        magic = src_file.read(4)
        if is_unicode(magic):
            src_file.seek(0)
            return
        while len(magic) < 4:
            magic = magic + b'Q'
        if magic[:2] == b'\xff\xfe' or magic[:2] == b'\xfe\xff':
            if magic[2:] != b'\x00\x00':
                magic = magic[:2]
        elif magic[:3] == b'\xef\xbb\xbf':
            magic = magic[:3]
        self.encoding, seek_pos, self.bom = self.magic_table.get(
            magic, ('utf-8', 0, False))
        src_file.seek(seek_pos)

    encoding_aliases = {
        # not strictly true as UTF-16 includes surrogate processing
        'iso-10646-ucs-2': ('utf_16', True),
        'iso-10646-ucs-4': ('utf_32', True),
        'utf-16': ('utf_16', True),
        'utf-32': ('utf_32', True),
        # for compatibility with some older XML documents
        'cn-big5': ('big5', False),
        'cn-gb2312': ('gb2312', False)
    }

    def change_encoding(self, encoding):
        """Changes the character encoding used for this entity.

        In many cases we can only guess at the encoding used in a file
        or other byte stream.  However, XML has a mechanism for
        declaring the encoding as part of the XML or Text declaration.
        This declaration can typically be parsed even if the encoding
        has been guessed incorrectly initially. This method allows the
        XML parser to notify the entity that a new encoding has been
        declared and that future characters should be interpreted with
        this new encoding.  (There are some situations where the request
        is ignored, such as when the encoding has already been detected
        to be UCS-2 or UCS-4 or when the source stream is not seekable.)

        You can only change the encoding once.  This method calls
        :py:meth:`keep_encoding` once the encoding has been changed."""
        if self.data_source:
            lencoding = encoding.lower()
            if lencoding in self.encoding_aliases:
                encoding, keep_existing = self.encoding_aliases[lencoding]
            else:
                keep_existing = False
            # Sometimes we'll change encoding but want to stick with
            # what we have. For the ucs-2 and ucs-4 encodings it is
            # impossible for us to have got to the point of parsing a
            # declaration without knowing if we're using LE or BE in the
            # stream.  Given that these encodings map to the python
            # UTF-16 and UTF-32 we don't want to reset the stream
            # because that will force BOM detection and we may not have
            # a BOM to detect.
            if not keep_existing and self.char_seek is not None:
                self.encoding = encoding
                # Need to rewind and re-read the current buffer
                self.char_source.seek(self.char_seek)
                self.char_source = codecs.getreader(
                    self.encoding)(self.data_source)
                self.chars = self.char_source.read(len(self.chars))
                # We assume that char_pos will still point to the correct
                # next character
                self.the_char = self.chars[self.char_pos]
        self.keep_encoding()

    @old_method('KeepEncoding')
    def keep_encoding(self):
        """Fixes the character encoding used in the entity.

        This entity parser starts in a cautious mode, parsing the entity
        one character a time to avoid errors caused by buffering with
        the wrong encoding.  This method should be called once the
        encoding is determined so that the entity parser can use its
        internal character buffer."""
        self.chunk = XMLEntity.chunk_size

    def next_line(self):
        """Called when the entity reader detects a new line.

        This method increases the internal line count and resets the
        character position to the beginning of the line.  You will not
        normally need to call this directly as line handling is done
        automatically by :py:meth:`next_char`."""
        self.line_num = self.line_num + 1
        self.line_pos = 0

    def close(self):
        """Closes the entity."""
        if self.close_source:
            if self.char_source is not None:
                self.char_source.close()
                self.char_source = None
            if self.data_source is not None:
                self.data_source.close()
                self.data_source = None
            self.close_source = False
        self.the_char = None
        self.line_num = None
        self.line_pos = None

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()


class XMLDeclaredEntity(XMLEntity):

    """Abstract class representing a declared entitiy.

    name
        An optional string used as the name of the entity

    definition
        The definition of the entity is either a string or an instance of
        :class:`XMLExternalID`, depending on whether the entity is an
        internal or external entity respectively."""

    def __init__(self, name=None, definition=None):
        XMLEntity.__init__(self)
        #: the entity in which this entity was declared
        self.entity = None
        #: the name passed to the constructor
        self.name = name
        #: the definition passed to the constructor
        self.definition = definition

    def get_name(self):
        """Human-readable name suitable for logging/error reporting.

        Simply returns name"""
        return self.name

    def is_external(self):
        """Returns True if this is an external entity."""
        return isinstance(self.definition, XMLExternalID)

    def open(self):
        """Opens the entity for reading.

        External entities must be parsed for text declarations before
        the replacement text is encountered.  This requires a small
        amount of look-ahead which may result in some characters needing
        to be re-parsed. We pass this to future parsers using
        :py:attr:`buff_text`."""
        if isinstance(self.definition, bytes):
            self.open_string(self.definition)
        elif is_unicode(self.definition):
            self.open_unicode(self.definition)
        elif isinstance(self.definition, XMLExternalID):
            # open from location or raise NotImplementedError
            if self.location is None:
                raise XMLMissingResourceError("Unresolved External ID")
            XMLEntity.open(self)
            # Now to handle the text declaration
            from pyslet.xml.parser import XMLParser
            p = XMLParser(self)
            if p.parse_literal('<?xml'):
                p.parse_text_decl(True)
            self.keep_encoding()
            # at this point we may have some left over text in p's buffer
            # we can't push it back down the pipe so need to handle here
            self.buff_text = p._get_buff()
        else:
            raise XMLError("Bad Entity Definition")


class XMLGeneralEntity(XMLDeclaredEntity):

    """Represents a general entity.

    name
        Optional name

    definition
        An optional definition

    notation
        An optional notation."""

    def __init__(self, name=None, definition=None, notation=None):
        XMLDeclaredEntity.__init__(self, name, definition)
        #: the notation name for external unparsed entities
        self.notation = notation

    def get_name(self):
        """Formats the name as a general entity reference."""
        return "&%s;" % self.name


class XMLParameterEntity(XMLDeclaredEntity):

    """Represents a parameter entity.

    name
        An optional name

    definition
        An optional definition.

    See base class for more information on the parameters."""

    def __init__(self, name=None, definition=None):
        XMLDeclaredEntity.__init__(self, name, definition)
        self.pe_end = None

    def next_char(self):
        XMLEntity.next_char(self)
        if self.the_char is None and self.pe_end:
            self.the_char = self.pe_end
            self.pe_end = None

    def open_as_pe(self):
        """Opens the parameter entity in the context of a DTD.

        This special method implements the rule that the replacement
        text of a parameter entity, when included as a PE, must be
        enlarged by the attachment of a leading and trailing space."""
        self.open()
        self.buff_text = uspace + self.buff_text
        self.pe_end = uspace

    def get_name(self):
        """Formats the name as a parameter entity reference."""
        return "%%%s;" % self.name


class XMLExternalID(object):

    """Represents external references to entities.

    public
        An optional public identifier

    system
        An optional system identifier

    One (or both) of the identifiers should be provided."""

    def __init__(self, public=None, system=None):
        self.public = public  # : the public identifier, may be None
        self.system = system  # : the system identifier, may be None.

    def get_location(self, base=None):
        """Get an absolute URI for the external entity.

        Returns a :py:class:`pyslet.rfc2396.URI` resolved against
        :py:attr:`base` if applicable.  If there is no system identifier
        then None is returned."""
        if self.system:
            if base:
                location = uri.URI.from_octets(self.system).resolve(base)
            else:
                location = uri.URI.from_octets(self.system)
            if not location.is_absolute():
                cwd = uri.URI.from_path(
                    os.path.join(os.getcwd(), os.curdir))
                location = location.resolve(cwd)
            if location.is_absolute():
                return location
        return None


class XMLNotation(object):

    """Represents an XML Notation defined in Section 4.7

    name
        The name of the notation

    external_id
        A :py:class:`XMLExternalID` instance in which one of
        *public* or *system* must be provided."""

    def __init__(self, name, external_id):
        #: the notation name
        self.name = name
        #: the external ID of the notation (an XMLExternalID instance)
        self.external_id = external_id


@old_function('MapClassElements')
def map_class_elements(class_map, scope):
    """Adds element name -> class mappings to class_map

    class_map
        A dictionary that maps XML element names onto class objects that
        should be used to represent them.

    scope
        A dictionary, or an object containing a __dict__ attribute, that
        will be scanned for class objects to add to the mapping.  This
        enables scope to be a module.  The search is not recursive, to
        add class elements from imported modules you must call
        map_class_elements for each module.

    Mappings are added for each class that is derived from
    :class:`Element` that has an XMLNAME attribute defined.  It is an
    error if a class is found with an XMLNAME that has already been
    mapped."""
    if not isinstance(scope, dict):
        scope = scope.__dict__
    for name, obj in dict_items(scope):
        if issubclass(type(obj), type) and issubclass(obj, Element):
            if hasattr(obj, 'XMLNAME'):
                if obj.XMLNAME in class_map:
                    raise DuplicateXMLNAME(
                        "%s and %s have matching XMLNAMEs" %
                        (obj.__name__, class_map[
                            obj.XMLNAME].__name__))
                class_map[obj.XMLNAME] = obj