Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
287 lines (244 sloc) 7.42 KB
;;; rudel-xml.el --- XML processing functions used by Rudel
;; Copyright (C) 2009, 2010 Jan Moringen
;; Author: Jan Moringen <>
;; Keywords: rudel, xml
;; X-RCS: $Id:$
;; This file is part of Rudel.
;; Rudel is free software: you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; Rudel is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with Rudel. If not, see <>.
;;; Commentary:
;; Conversion functions:
;; + `xml->string'
;; + `string->xml'
;; XML Macros:
;; + `with-tag-attrs'
;; + `do-tag-children'
;; Stream parsing functions:
;; + `rudel-xml-toplevel-tag-positions'
;; + `rudel-xml-toplevel-tags'
;;; History:
;; 0.1 - Initial version
;;; Code:
(require 'xml)
;;; Miscellaneous functions
(defun xml->string (xml &optional pretty-print)
"Convert infoset XML to string representation.
PRETTY-PRINT is currently ignored."
(if pretty-print
(xml-print (list xml))
(rudel-xml-print-node xml)))
(defun string->xml (string)
"Convert STRING to XML infoset."
(insert string)
(car (xml-parse-region (point-min) (point-max)))))
;;; Additional XML macros
(defun rudel-xml--node-component (node-var name &optional type)
"Generate code for accessing the NAME component of NODE-VAR.
The optional argument TYPE is used when name is :child
or :children to specify the element name of the child."
(case name
;; Retrieve child text node of NODE-VAR.
`(car (xml-node-children ,node-var))
;; Retrieve a single child named TYPE of NODE-VAR.
(unless type
(signal 'wrong-number-of-arguments
(list 'rudel-xml--node-component name 2)))
`(car (xml-get-children ,node-var (quote,type)))
;; Retrieve a list of children, optionally filtering by NAME.
(if type
`(xml-get-children ,node-var (quote ,type))
`(xml-node-children ,node-var)
;; Retrieve an attribute value.
`(xml-get-attribute ,node-var (quote ,name))
(defun rudel-xml--parse-value (value-var type)
"Generate code to parse the value of VALUE-VAR as TYPE.
Currently, TYPE can be one of 'string and 'number."
(case type
;; String; no conversion
;; Convert to number
`(when ,value-var
(string-to-number ,value-var)))
;; For other types, signal an error.
(signal 'wrong-type-argument (list 'type type))))
(defmacro with-tag-attrs (attrs tag &rest body)
"Execute BODY with bindings of attribute values of TAG according to forms in ATTRS.
ATTRS is structured as follows:
VAR is a symbol. ATTR is a symbol whose symbol-name is used as
tag name. TYPE can be 'number."
(declare (indent 2)
(debug (listp form &rest form)))
(let* ((node-var (make-symbol "node-var"))
(lambda (attr)
;; Simple form
((symbolp attr)
`(,attr ,(car (rudel-xml--node-component
node-var attr))))
;; Variable name and attribute name
((= (length attr) 2)
(destructuring-bind (attr-var name) attr
(let ((value (car (rudel-xml--node-component
node-var name))))
`(,attr-var ,value))))
;; Variable name, attribute name and type
((= (length attr) 3)
(destructuring-bind (attr-var name type) attr
(destructuring-bind (value type-consumed)
node-var name type)
(if type-consumed
`(,attr-var ,value)
(let ((string (make-symbol "value-string")))
`(,attr-var (let ((,string ,value))
string type))))))))
;; Invalid form
;; TODO define a proper condition or use signal?
(error "Invalid tag clause: %s" attr))))
;; Construct binding forms
`(let ((,node-var ,tag))
(let (,@bindings)
(defmacro do-tag-children (var-and-tag &rest body)
"Bind a var to children of a tag, eval BODY for each binding.
VAR-AND-TAG has to be a list of the form (VAR TAG)."
(declare (indent 1)
(debug ((symbolp form) &rest form)))
(let ((var (nth 0 var-and-tag))
(tag (nth 1 var-and-tag))
(children (make-symbol "children")))
`(let ((,children (xml-node-children ,tag)))
(dolist (,var ,children)
;;; Stream-based parsing
(defun rudel-xml-toplevel-tag-positions (string)
"Return positions of top-level XML tags in STRING.
The return value is a list of cons cells. Each cell contains a
start position and an end position."
(let ((depth 0)
(tag-opening nil)
(tags nil))
(dolist (index (number-sequence 0 (- (length string) 1)))
;; Opening element
((= (aref string index) ?<)
(setq tag-opening (/= (aref string (+ index 1)) ?/))
(when (and (= depth 0)
(setq start index)))
;; Closing element
((= (aref string index) ?>)
(unless (or (= (aref string (- index 1)) ?/)
(= (aref string (- index 1)) ??))
(if tag-opening
(incf depth)
(decf depth)))
(when (= depth 0)
(push (cons start (+ index 1)) tags)))))
;; Return list of tag positions.
(nreverse tags)))
(defun rudel-xml-toplevel-tags (string)
"Parse STRING as partial XML document, return complete and partial tags."
(let ((tags (rudel-xml-toplevel-tag-positions string)))
;; Map top-level tag ranges into substrings.
(lambda (tag-range)
(substring string (car tag-range) (cdr tag-range)))
;; Add rest of the string
(if tags
(substring string (apply #'max (mapcar #'cdr tags)))
(defun rudel-xml-assemble-tags (data storage)
"Assemble complete XML tags in DATA, return list of tags and a rest.
The returned value is a list of the following form
where complete COMPLETE is a list of complete tags and INCOMPLETE
is a string containing not yet complete tags."
(destructuring-bind (tags buffer)
(rudel-xml-toplevel-tags (concat storage data))
(list tags buffer)))
;;; Utility functions
(defun rudel-xml-print-node (node)
"Serialize XML infoset NODE."
((stringp node)
(let ((name (symbol-name (xml-node-name node)))
(attributes (xml-node-attributes node))
(children (xml-node-children node)))
"<" name
(when attributes " ")
(mapconcat #'rudel-xml-print-attr attributes " ")
(if children ">" "/>")
(mapconcat #'rudel-xml-print-node children "")
(when children
(concat "</" name ">"))))))
(defun rudel-xml-print-attr (attr)
"Print XML attribute ATTR which is a cons cell."
(concat (symbol-name (car attr))
"\"" (xml-escape-string (cdr attr)) "\""))
(provide 'rudel-xml)
;;; rudel-xml.el ends here