Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added toxml() as convenience wrapper over totext() #529

Merged
merged 5 commits into from
Dec 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changes
=======

Version 1.7.0
-------------

* Added `toxml()` as convenience wrapper over `totext()`.
By :user:`juarezr`, :issue:`529`.


Version 1.6.8
-------------

Expand Down
3 changes: 1 addition & 2 deletions docs/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ XML files
---------

.. autofunction:: petl.io.xml.fromxml

For writing to an XML file, see :func:`petl.io.text.totext`.
.. autofunction:: petl.io.xml.toxml


.. module:: petl.io.html
Expand Down
2 changes: 1 addition & 1 deletion petl/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from petl.io.text import fromtext, totext, appendtext, teetext

from petl.io.xml import fromxml
from petl.io.xml import fromxml, toxml

from petl.io.html import tohtml, teehtml

Expand Down
197 changes: 196 additions & 1 deletion petl/io/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@


# internal dependencies
from petl.util.base import Table
from petl.util.base import Table, fieldnames, iterpeek
from petl.io.sources import read_source_from_arg
from petl.io.text import totext


def fromxml(source, *args, **kwargs):
Expand Down Expand Up @@ -260,3 +261,197 @@ def _get(v):
else:
return missing
return _get


def toxml(table, target=None,
root=None, head=None, rows=None, prologue=None, epilogue=None,
style='tag', encoding='utf-8'):
"""
Write the table into a new xml file according to elements defined in the
function arguments.

The `root`, `head` and `rows` (string, optional) arguments define the tags
and the nesting of the xml file. Each one defines xml elements with tags
separated by slashes (`/`) like in `root/level/tag`. They can have a
arbitrary number of tags that will reflect in more nesting levels for the
header or record/row written in the xml file.

For details on tag naming and nesting rules check xml `specification`_ or
xml `references`_.

The `rows` argument define the elements for each row of data to be written
in the xml file. When specified, it must have at least 2 tags for defining
the tags for `row/column`. Additional tags will add nesting enclosing all
records/rows/lines.

The `head` argument is similar to the rows, but aplies only to one line/row
of header with fieldnames. When specified, it must have at least 2 tags for
`fields/name` and the remaining will increase nesting.

The `root` argument defines the elements enclosing `head` and `rows` and is
required when using `head` for specifying valid xml documents.

When none of this arguments are specified, they will default to tags that
generate output similar to a html table:
`root='table', head='there/tr/td', rows='tbody/tr/td'`.

The `prologue` argument (string, optional) could be a snippet of valid xml
that will be inserted before other elements in the xml. It can optionally
specify the `XML Prolog` of the file.

The `epilogue` argument (string, optional) could be a snippet of valid xml
that will be inserted after all other xml elements except the root closing
tag. It must specify a closing tag if the `root` argument is not specified.

The `style` argument select the format of the elements in the xml file. It
can be `tag` (default), `name`, `attribute` or a custom string to format
each row via
`str.format <http://docs.python.org/library/stdtypes.html#str.format>`_.

Example usage for writing files::

>>> import petl as etl
>>> table1 = [['foo', 'bar'],
... ['a', 1],
... ['b', 2]]
>>> etl.toxml(table1, 'example4.xml')
>>> # see what we did is similar a html table:
>>> print(open('example4.xml').read())
<?xml version="1.0" encoding="UTF-8"?>
<table><thead>
<tr><th>foo</th><th>bar</th></tr>
</thead><tbody>
<tr><td>a</td><td>1</td></tr>
<tr><td>b</td><td>2</td></tr>
</tbody></table>
>>> # define the nesting in xml file:
>>> etl.toxml(table1, 'example5.xml', rows='plan/line/cell')
>>> print(open('example5.xml').read())
<?xml version="1.0" encoding="UTF-8"?>
<plan>
<line><cell>a</cell><cell>1</cell></line>
<line><cell>b</cell><cell>2</cell></line>
</plan>
>>> # choose other style:
>>> etl.toxml(table1, 'example6.xml', rows='row/col', style='attribute')
>>> print(open('example6.xml').read())
<?xml version="1.0" encoding="UTF-8"?>
<row>
<col foo="a" bar="1" />
<col foo="b" bar="2" />
</row>
>>> etl.toxml(table1, 'example6.xml', rows='row/col', style='name')
>>> print(open('example6.xml').read())
<?xml version="1.0" encoding="UTF-8"?>
<row>
<col><foo>a</foo><bar>1</bar></col>
<col><foo>b</foo><bar>2</bar></col>
</row>

The `toxml()` function is just a wrapper over :func:`petl.io.text.totext`.
For advanced cases use a template with `totext()` for generating xml files.

.. versionadded:: 1.7.0

.. _specification: https://www.w3.org/TR/xml/
.. _references: https://www.w3schools.com/xml/xml_syntax.asp

"""
if not root and not head and not rows:
root = 'table'
head = 'thead/tr/th'
rows = 'tbody/tr/td'

sample, table2 = iterpeek(table, 2)
props = fieldnames(sample)

top = _build_xml_header(style, props, root, head, rows, prologue, encoding)
template = _build_cols(style, props, rows, True)
bottom = _build_xml_footer(style, epilogue, rows, root)

totext(table2, source=target, encoding=encoding, errors='strict',
template=template, prologue=top, epilogue=bottom)


def _build_xml_header(style, props, root, head, rows, prologue, encoding):
tab = _build_nesting(root, False, None) if root else ''
nested = -1 if style in ('attribute', 'name') else -2
if head:
th1 = _build_nesting(head, False, nested)
col = _build_cols(style, props, head, False)
th2 = _build_nesting(head, True, nested)
thd = '{0}\n{1}{2}'.format(th1, col, th2)
else:
thd = ''
tbd = _build_nesting(rows, False, nested)
if prologue and prologue.startswith('<?xml'):
thb = '{0}{1}{2}\n'.format(tab, thd, tbd)
return prologue + thb
enc = encoding.upper() if encoding else 'UTF-8'
xml = '<?xml version="1.0" encoding="%s"?>' % enc
pre = prologue + '\n' if prologue and not root else ''
pos = '\n' + prologue if prologue and root else ''
res = '{0}\n{1}{2}{3}{4}{5}\n'.format(xml, pre, tab, thd, tbd, pos)
return res


def _build_xml_footer(style, epilogue, rows, root):
nested = -1 if style in ('attribute', 'name') else -2
tbd = _build_nesting(rows, True, nested)
tab = _build_nesting(root, True, 0)
pre = epilogue + '\n' if epilogue and root else ''
pos = '\n' + epilogue if epilogue and not root else ''
return pre + tbd + tab + pos


def _build_nesting(path, closing, index):
if not path:
return ''
fmt = '</%s>' if closing else '<%s>'
if '/' not in path:
return fmt % path
parts = path.split('/')
elements = parts[0:index] if index else parts
if closing:
elements.reverse()
tags = [fmt % e for e in elements]
return ''.join(tags)


def _build_cols(style, props, path, is_value):
is_header = not is_value
if style == 'tag' or is_header:
return _build_cols_inline(props, path, is_value, True)
if style == 'name':
return _build_cols_inline(props, path, is_value, False)
if style == 'attribute':
return _build_cols_attribs(props, path)
return style # custom


def _build_cols_inline(props, path, is_value, use_tag):
parts = path.split('/')
if use_tag:
if len(parts) < 2:
raise ValueError("Tag not in format 'row/col': %s" % path)
col = parts[-1]
row = parts[-2:-1][0]
else:
col = '{0}'
row = parts[-1]
fld = '{{{0}}}' if is_value else '{0}'
fmt = '<{0}>{1}</{0}>'.format(col, fld)
cols = [fmt.format(e) for e in props]
tags = ''.join(cols)
res = ' <{0}>{1}</{0}>\n'.format(row, tags)
return res


def _build_cols_attribs(props, path):
parts = path.split('/')
row = parts[-1]
fmt = '{0}="{{{0}}}"'
cols = [fmt.format(e) for e in props]
atts = ' '.join(cols)
res = ' <{0} {1} />\n'.format(row, atts)
return res
Loading