Skip to content

Commit

Permalink
Merge pull request #89 from jimhester/feature/attached-namespace
Browse files Browse the repository at this point in the history
Attach the namespace to the xml_document when it is created
  • Loading branch information
jimhester committed May 26, 2016
2 parents 248e9fb + 44c89cf commit c93169a
Show file tree
Hide file tree
Showing 18 changed files with 381 additions and 149 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Expand Up @@ -16,6 +16,7 @@ S3method(as.character,xml_nodeset)
S3method(as_list,xml_missing)
S3method(as_list,xml_node)
S3method(as_list,xml_nodeset)
S3method(format,xml_node)
S3method(nodeset_apply,xml_missing)
S3method(nodeset_apply,xml_node)
S3method(nodeset_apply,xml_nodeset)
Expand Down Expand Up @@ -65,6 +66,9 @@ S3method(xml_length,xml_nodeset)
S3method(xml_name,xml_missing)
S3method(xml_name,xml_node)
S3method(xml_name,xml_nodeset)
S3method(xml_ns,xml_document)
S3method(xml_ns,xml_node)
S3method(xml_ns,xml_nodeset)
S3method(xml_parent,xml_missing)
S3method(xml_parent,xml_node)
S3method(xml_parent,xml_nodeset)
Expand Down Expand Up @@ -113,6 +117,7 @@ export(xml_name)
export(xml_new_document)
export(xml_ns)
export(xml_ns_rename)
export(xml_ns_strip)
export(xml_parent)
export(xml_parents)
export(xml_path)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
@@ -1,5 +1,10 @@
# xml2 0.1.2.9000

* `xml_read()` functions now default to passing the document's namespace
object. Namespace definitions can now be removed as well as added and
`xml_ns_strip()` added to remove all default namespaces from a document.
(@jimhester, #28, #89)

* `xml_read()` gains a `options` argument to control all available parsing
options, including `HUGE` to turn off limits for parsing very large
documents and now drops blank text nodes by default, mimicking default
Expand Down
8 changes: 0 additions & 8 deletions R/RcppExports.R
Expand Up @@ -61,10 +61,6 @@ ns_lookup <- function(doc, node, prefix) {
.Call('xml2_ns_lookup', PACKAGE = 'xml2', doc, node, prefix)
}

ns_dump <- function(node) {
.Call('xml2_ns_dump', PACKAGE = 'xml2', node)
}

node_name <- function(node, nsMap) {
.Call('xml2_node_name', PACKAGE = 'xml2', node, nsMap)
}
Expand All @@ -89,10 +85,6 @@ node_set_attr <- function(node, name, value, nsMap) {
invisible(.Call('xml2_node_set_attr', PACKAGE = 'xml2', node, name, value, nsMap))
}

node_remove_attr <- function(node, name, nsMap) {
invisible(.Call('xml2_node_remove_attr', PACKAGE = 'xml2', node, name, nsMap))
}

node_format <- function(doc, node, format = TRUE, indent = 0L) {
.Call('xml2_node_format', PACKAGE = 'xml2', doc, node, format, indent)
}
Expand Down
3 changes: 1 addition & 2 deletions R/classes.R
Expand Up @@ -150,8 +150,7 @@ format.xml_node <- function(x, ...) {
paste("<",
paste(
c(xml_name(x),
c(format_attributes(attrs)),
format_attributes(ns_dump(x$node))),
format_attributes(attrs)),
collapse = " "),
">", sep = "")
}
Expand Down
2 changes: 1 addition & 1 deletion R/xml_attr.R
Expand Up @@ -110,7 +110,7 @@ xml_attrs.xml_nodeset <- function(x, ns = character()) {
#' @export
`xml_attr<-.xml_node` <- function(x, attr, ns = character(), value) {
if (is.null(value)) {
node_remove_attr(x$node, name = attr, nsMap = ns)
node_set_attr(x$node, name = attr, nsMap = ns, "")
} else {
node_set_attr(x$node, name = attr, nsMap = ns, value)
}
Expand Down
9 changes: 8 additions & 1 deletion R/xml_children.R
Expand Up @@ -99,7 +99,14 @@ xml_length.xml_nodeset <- function(x, only_elements = TRUE) {
#' @export
#' @rdname xml_children
xml_root <- function(x) {
stopifnot(inherits(x, "xml_node") || inherits(x, "xml_document"))
stopifnot(inherits(x, c("xml_node", "xml_document", "xml_nodeset")))

if (inherits(x, "xml_nodeset")) {
if (length(x) == 0) {
return(NULL)
} else {
return(xml_root(x[[1]]))
}
}
xml_document(x$doc)
}
41 changes: 21 additions & 20 deletions R/xml_find.R
Expand Up @@ -20,6 +20,7 @@
#' \code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return
#' numeric, character and logical results respectively.
#' @export
#' @seealso \code{\link{xml_ns_strip}} to remove the default namespaces
#' @examples
#' x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
#' xml_find_all(x, ".//baz")
Expand Down Expand Up @@ -62,23 +63,23 @@
#' ')
#' xml_find_all(x, ".//f:doc")
#' xml_find_all(x, ".//f:doc", xml_ns(x))
xml_find_all <- function(x, xpath, ns = character()) {
xml_find_all <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_all")
}

#' @export
xml_find_all.xml_missing <- function(x, xpath, ns = character()) {
xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
xml_nodeset()
}

#' @export
xml_find_all.xml_node <- function(x, xpath, ns = character()) {
xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x)) {
nodes <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
xml_nodeset(nodes)
}

#' @export
xml_find_all.xml_nodeset <- function(x, xpath, ns = character()) {
xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0)
return(xml_nodeset())

Expand All @@ -91,16 +92,16 @@ xml_find_all.xml_nodeset <- function(x, xpath, ns = character()) {

#' @export
#' @rdname xml_find_all
xml_find_one <- function(x, xpath, ns = character()) {
xml_find_one <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_one")
}

xml_find_one.xml_missing <- function(x, xpath, ns = character()) {
xml_find_one.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
structure(list(), class = "xml_missing")
}

#' @export
xml_find_one.xml_node <- function(x, xpath, ns = character()) {
xml_find_one.xml_node <- function(x, xpath, ns = xml_ns(x)) {
res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = 1)
if (length(res) == 1) {
res[[1]]
Expand All @@ -110,7 +111,7 @@ xml_find_one.xml_node <- function(x, xpath, ns = character()) {
}

#' @export
xml_find_one.xml_nodeset <- function(x, xpath, ns = character()) {
xml_find_one.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0)
return(xml_nodeset())

Expand All @@ -121,12 +122,12 @@ xml_find_one.xml_nodeset <- function(x, xpath, ns = character()) {

#' @export
#' @rdname xml_find_all
xml_find_num <- function(x, xpath, ns = character()) {
xml_find_num <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_num")
}

#' @export
xml_find_num.xml_node <- function(x, xpath, ns = character()) {
xml_find_num.xml_node <- function(x, xpath, ns = xml_ns(x)) {
res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
if (!is.numeric(res)) {
stop("result of type: ", sQuote(class(res)), ", not numeric", call. = FALSE)
Expand All @@ -135,26 +136,26 @@ xml_find_num.xml_node <- function(x, xpath, ns = character()) {
}

#' @export
xml_find_num.xml_nodeset <- function(x, xpath, ns = character()) {
xml_find_num.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0)
return(numeric())

vapply(x, function(x) xml_find_num(x, xpath = xpath, ns = ns), numeric(1))
}

#' @export
xml_find_num.xml_missing <- function(x, xpath, ns = character()) {
xml_find_num.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
numeric(0)
}

#' @export
#' @rdname xml_find_all
xml_find_chr <- function(x, xpath, ns = character()) {
xml_find_chr <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_chr")
}

#' @export
xml_find_chr.xml_node <- function(x, xpath, ns = character()) {
xml_find_chr.xml_node <- function(x, xpath, ns = xml_ns(x)) {
res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
if (!is.character(res)) {
stop("result of type: ", sQuote(class(res)), ", not character", call. = FALSE)
Expand All @@ -163,26 +164,26 @@ xml_find_chr.xml_node <- function(x, xpath, ns = character()) {
}

#' @export
xml_find_chr.xml_nodeset <- function(x, xpath, ns = character()) {
xml_find_chr.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0)
return(character())

vapply(x, function(x) xml_find_chr(x, xpath = xpath, ns = ns), character(1))
}

#' @export
xml_find_chr.xml_missing <- function(x, xpath, ns = character()) {
xml_find_chr.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
character(0)
}

#' @export
#' @rdname xml_find_all
xml_find_lgl <- function(x, xpath, ns = character()) {
xml_find_lgl <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_lgl")
}

#' @export
xml_find_lgl.xml_node <- function(x, xpath, ns = character()) {
xml_find_lgl.xml_node <- function(x, xpath, ns = xml_ns(x)) {
res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
if (!is.logical(res)) {
stop("result of type: ", sQuote(class(res)), ", not logical", call. = FALSE)
Expand All @@ -191,14 +192,14 @@ xml_find_lgl.xml_node <- function(x, xpath, ns = character()) {
}

#' @export
xml_find_lgl.xml_nodeset <- function(x, xpath, ns = character()) {
xml_find_lgl.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0)
return(logical())

vapply(x, function(x) xml_find_lgl(x, xpath = xpath, ns = ns), logical(1))
}

#' @export
xml_find_lgl.xml_missing <- function(x, xpath, ns = character()) {
xml_find_lgl.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
logical(0)
}
30 changes: 30 additions & 0 deletions R/xml_modify.R
Expand Up @@ -185,3 +185,33 @@ xml_new_document <- function(version = "1.0") {
doc <- doc_new(version)
structure(list(doc = doc), class = "xml_document")
}

#' Strip the default namespaces from a document
#'
#' @inheritParams xml_name
#' @examples
#' x <- read_xml(
#' "<foo xmlns = 'http://foo.com'>
#' <baz/>
#' <bar xmlns = 'http://bar.com'>
#' <baz/>
#' </bar>
#' </foo>")
#' # Need to specify the default namespaces to find the baz nodes
#' xml_find_all(x, "//d1:baz")
#' xml_find_all(x, "//d2:baz")
#'
#' # After stripping the default namespaces you can find both baz nodes directly
#' xml_ns_strip(x)
#' xml_find_all(x, "//baz")
#' @export
xml_ns_strip <- function(x) {

# //namespace::*[name()=''] finds all the namespace definition nodes with no
# prefix (default namespaces).
# What we actually want is the element node the definitions are contained in
# so return the parent (/parent::*)
namespace_element_nodes <- xml_find_all(x, "//namespace::*[name()='']/parent::*")
xml_attr(namespace_element_nodes, "xmlns") <- NULL
invisible(x)
}
17 changes: 17 additions & 0 deletions R/xml_namespaces.R
Expand Up @@ -37,6 +37,15 @@
#' str(as_list(x))
#' str(as_list(x, ns))
xml_ns <- function(x) {
UseMethod("xml_ns")
}

#' @export
xml_ns.xml_document <- function(x) {
if (length(x) == 0) {
return(character())
}

stopifnot(inherits(x, "xml_document"))
doc <- x$doc
x <- doc_namespaces(doc)
Expand All @@ -53,6 +62,14 @@ xml_ns <- function(x) {
x
}

#' @export
xml_ns.xml_node <- function(x) {
xml_ns(xml_root(x))
}

#' @export
xml_ns.xml_nodeset <- xml_ns.xml_node

#' @export
print.xml_namespace <- function(x, ...) {
prefix <- format(names(x))
Expand Down
6 changes: 1 addition & 5 deletions R/xml_text.R
Expand Up @@ -41,10 +41,6 @@ xml_text.xml_nodeset <- function(x, trim = FALSE) {
UseMethod("xml_text<-")
}

# TODO: Should this only be called on TEXT nodes? If it is used on non-text nodes it will remove child nodes from the tree.
#
# https://github.com/GNOME/libxml2/blob/e28939036281969477d3913a51c001bb7635fe54/doc/examples/xpath2.c#L163-L179

#' @export
`xml_text<-.xml_nodeset` <- function(x, value) {
# We need to do the modification in reverse order as the modification can
Expand All @@ -59,7 +55,7 @@ xml_text.xml_nodeset <- function(x, trim = FALSE) {
#' @export
`xml_text<-.xml_node` <- function(x, value) {
if (xml_type(x) != "text") {
text_child <- xml_find_one(x, ".//text()[1]")
text_child <- xml_find_one(x, ".//text()[1]", ns = character())
if (inherits(text_child, "xml_missing")) {
node_append_content(x$node, value)
} else {
Expand Down
13 changes: 8 additions & 5 deletions man/xml_find_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/xml_ns_strip.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c93169a

Please sign in to comment.