diff --git a/NAMESPACE b/NAMESPACE index ac692aa..bf0525b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,15 +71,6 @@ S3method(xml_add_parent,xml_nodeset) S3method(xml_add_sibling,xml_missing) S3method(xml_add_sibling,xml_node) S3method(xml_add_sibling,xml_nodeset) -S3method(xml_attr,xml_missing) -S3method(xml_attr,xml_node) -S3method(xml_attr,xml_nodeset) -S3method(xml_attrs,xml_missing) -S3method(xml_attrs,xml_node) -S3method(xml_attrs,xml_nodeset) -S3method(xml_double,xml_missing) -S3method(xml_double,xml_node) -S3method(xml_double,xml_nodeset) S3method(xml_find_all,xml_missing) S3method(xml_find_all,xml_node) S3method(xml_find_all,xml_nodeset) @@ -95,15 +86,6 @@ S3method(xml_find_lgl,xml_nodeset) S3method(xml_find_num,xml_missing) S3method(xml_find_num,xml_node) S3method(xml_find_num,xml_nodeset) -S3method(xml_integer,xml_missing) -S3method(xml_integer,xml_node) -S3method(xml_integer,xml_nodeset) -S3method(xml_length,xml_missing) -S3method(xml_length,xml_node) -S3method(xml_length,xml_nodeset) -S3method(xml_name,xml_missing) -S3method(xml_name,xml_node) -S3method(xml_name,xml_nodeset) S3method(xml_ns,xml_document) S3method(xml_ns,xml_missing) S3method(xml_ns,xml_node) @@ -111,9 +93,6 @@ S3method(xml_ns,xml_nodeset) S3method(xml_parent,xml_missing) S3method(xml_parent,xml_node) S3method(xml_parent,xml_nodeset) -S3method(xml_path,xml_missing) -S3method(xml_path,xml_node) -S3method(xml_path,xml_nodeset) S3method(xml_remove,xml_missing) S3method(xml_remove,xml_node) S3method(xml_remove,xml_nodeset) @@ -132,12 +111,6 @@ S3method(xml_set_attrs,xml_nodeset) S3method(xml_set_name,xml_missing) S3method(xml_set_name,xml_node) S3method(xml_set_name,xml_nodeset) -S3method(xml_text,xml_missing) -S3method(xml_text,xml_node) -S3method(xml_text,xml_nodeset) -S3method(xml_type,xml_missing) -S3method(xml_type,xml_node) -S3method(xml_type,xml_nodeset) S3method(xml_url,xml_missing) S3method(xml_url,xml_node) S3method(xml_url,xml_nodeset) diff --git a/R/xml_attr.R b/R/xml_attr.R index 1f2a2fc..804a378 100644 --- a/R/xml_attr.R +++ b/R/xml_attr.R @@ -60,31 +60,7 @@ #' xml_attrs(doc) <- c("b:id" = "one", "f:id" = "two", "id" = "three") #' xml_set_attrs(doc, c("b:id" = "one", "f:id" = "two", "id" = "three")) xml_attr <- function(x, attr, ns = character(), default = NA_character_) { - UseMethod("xml_attr") -} - -#' @export -xml_attr.xml_missing <- function(x, attr, ns = character(), default = NA_character_) { - default -} - -#' @export -xml_attr.xml_node <- function(x, attr, ns = character(), - default = NA_character_) { - .Call(node_attr, x$node, attr, as.character(default), ns) -} - -#' @export -xml_attr.xml_nodeset <- function(x, attr, ns = character(), - default = NA_character_) { - vapply( - x, - xml_attr, - attr = attr, - default = default, - ns = ns, - FUN.VALUE = character(1) - ) + .Call(node_attr, x, attr, as.character(default), ns) } #' @export @@ -96,22 +72,7 @@ xml_has_attr <- function(x, attr, ns = character()) { #' @export #' @rdname xml_attr xml_attrs <- function(x, ns = character()) { - UseMethod("xml_attrs") -} - -#' @export -xml_attrs.xml_missing <- function(x, ns = character()) { - NA_character_ -} - -#' @export -xml_attrs.xml_node <- function(x, ns = character()) { - .Call(node_attrs, x$node, nsMap = ns) -} - -#' @export -xml_attrs.xml_nodeset <- function(x, ns = character()) { - lapply(x, xml_attrs, ns = ns) + .Call(node_attrs, x, nsMap = ns) } #' @param value character vector of new value. diff --git a/R/xml_children.R b/R/xml_children.R index 56f424a..dc34ecf 100644 --- a/R/xml_children.R +++ b/R/xml_children.R @@ -100,26 +100,7 @@ xml_parent.xml_nodeset <- function(x) { #' @export #' @rdname xml_children xml_length <- function(x, only_elements = TRUE) { - UseMethod("xml_length") -} - -#' @export -xml_length.xml_missing <- function(x, only_elements = TRUE) { - 0L -} - -#' @export -xml_length.xml_node <- function(x, only_elements = TRUE) { - .Call(node_length, x$node, only_elements) -} - -#' @export -xml_length.xml_nodeset <- function(x, only_elements = TRUE) { - if (length(x) == 0) { - return(0L) - } - - vapply(x, xml_length, only_elements = only_elements, FUN.VALUE = integer(1)) + .Call(node_length, x, only_elements) } #' @export diff --git a/R/xml_name.R b/R/xml_name.R index a849be9..812a690 100644 --- a/R/xml_name.R +++ b/R/xml_name.R @@ -18,22 +18,7 @@ #' z <- xml_children(y) #' xml_name(xml_children(y)) xml_name <- function(x, ns = character()) { - UseMethod("xml_name") -} - -#' @export -xml_name.xml_missing <- function(x, ns = character()) { - NA_character_ -} - -#' @export -xml_name.xml_nodeset <- function(x, ns = character()) { - vapply(x, xml_name, ns = ns, FUN.VALUE = character(1)) -} - -#' @export -xml_name.xml_node <- function(x, ns = character()) { - .Call(node_name, x$node, ns) + .Call(node_name, x, ns) } #' Modify the (tag) name of an element diff --git a/R/xml_path.R b/R/xml_path.R index 79f34d9..7e233a5 100644 --- a/R/xml_path.R +++ b/R/xml_path.R @@ -10,20 +10,5 @@ #' x <- read_xml("") #' xml_path(xml_find_all(x, ".//baz")) xml_path <- function(x) { - UseMethod("xml_path") -} - -#' @export -xml_path.xml_missing <- function(x) { - NA_character_ -} - -#' @export -xml_path.xml_node <- function(x) { - .Call(node_path, x$node) -} - -#' @export -xml_path.xml_nodeset <- function(x) { - vapply(x, xml_path, FUN.VALUE = character(1)) + .Call(node_path, x) } diff --git a/R/xml_text.R b/R/xml_text.R index 0aad297..be2b173 100644 --- a/R/xml_text.R +++ b/R/xml_text.R @@ -22,27 +22,16 @@ #' xml_integer(xml_find_all(x, "//@x")) #' @export xml_text <- function(x, trim = FALSE) { - UseMethod("xml_text") -} - -#' @export -xml_text.xml_missing <- function(x, trim = FALSE) { - NA_character_ -} - -#' @export -xml_text.xml_node <- function(x, trim = FALSE) { - res <- .Call(node_text, x$node) + res <- .Call(node_text, x) if (isTRUE(trim)) { - res <- sub("^[[:space:]\u00a0]+", "", res) - res <- sub("[[:space:]\u00a0]+$", "", res) + res <- trim_text(res) } res } -#' @export -xml_text.xml_nodeset <- function(x, trim = FALSE) { - vapply(x, xml_text, trim = trim, FUN.VALUE = character(1)) +trim_text <- function(x) { + x <- sub("^[[:space:]\u00a0]+", "", x) + sub("[[:space:]\u00a0]+$", "", x) } #' @rdname xml_text @@ -94,46 +83,11 @@ xml_text.xml_nodeset <- function(x, trim = FALSE) { #' @rdname xml_text #' @export xml_double <- function(x) { - UseMethod("xml_double") -} - -#' @export -xml_double.xml_missing <- function(x) { - NA_real_ -} - -#' @export -xml_double.xml_node <- function(x) { as.numeric(xml_text(x)) } -#' @export -xml_double.xml_nodeset <- function(x) { - vapply(x, xml_double, numeric(1)) -} - -#' @export -xml_integer <- function(x) { - UseMethod("xml_integer") -} - -#' @export -xml_integer.xml_missing <- function(x) { - NA_integer_ -} - #' @rdname xml_text #' @export xml_integer <- function(x) { - UseMethod("xml_integer") -} - -#' @export -xml_integer.xml_node <- function(x) { as.integer(xml_text(x)) } - -#' @export -xml_integer.xml_nodeset <- function(x) { - vapply(x, xml_integer, integer(1)) -} diff --git a/R/xml_type.R b/R/xml_type.R index f957d75..d5aac52 100644 --- a/R/xml_type.R +++ b/R/xml_type.R @@ -7,22 +7,7 @@ #' xml_type(x) #' xml_type(xml_contents(x)) xml_type <- function(x) { - UseMethod("xml_type") -} - -#' @export -xml_type.xml_missing <- function(x) { - NA_character_ -} - -#' @export -xml_type.xml_node <- function(x) { - xmlElementType[.Call(node_type, x$node)] -} - -#' @export -xml_type.xml_nodeset <- function(x) { - types <- vapply(x, function(x) .Call(node_type, x$node), integer(1)) + types <- .Call(node_type, x) xmlElementType[types] } diff --git a/inst/include/xml2_types.h b/inst/include/xml2_types.h index c83f18d..8835602 100644 --- a/inst/include/xml2_types.h +++ b/inst/include/xml2_types.h @@ -21,12 +21,12 @@ template class XPtr { data_ = R_MakeExternalPtr((void *) p, R_NilValue, R_NilValue); R_PreserveObject(data_); } - + XPtr(const XPtr &old) { data_ = old.data_; R_PreserveObject(data_); } - + XPtr& operator=(const XPtr &other) { R_PreserveObject(other.data_); if (data_ != NULL) { diff --git a/src/xml2_node.cpp b/src/xml2_node.cpp index 2f9d50a..35b35cc 100644 --- a/src/xml2_node.cpp +++ b/src/xml2_node.cpp @@ -12,6 +12,11 @@ #include "xml2_types.h" #include "xml2_utils.h" +__attribute__ ((noreturn)) +void stop_unexpected_node_type() { + Rf_error("Unexpected node type"); +} + template // for xmlAttr and xmlNode std::string nodeName(T* node, SEXP nsMap) { std::string name = Xml2String(node->name).asStdString(); @@ -27,13 +32,57 @@ std::string nodeName(T* node, SEXP nsMap) { return prefix + ":" + name; } +SEXP node_name_impl(SEXP x, SEXP nsMap) { + NodeType type = getNodeType(x); + + SEXP out; + + switch(type) { + case NodeType::missing: + out = NA_STRING; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + + std::string name = nodeName(node.checked_get(), nsMap); + out = Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8); + break; + } + default: stop_unexpected_node_type(); + } + + return out; +} + // [[export]] -extern "C" SEXP node_name(SEXP node_sxp, SEXP nsMap) { +extern "C" SEXP node_name(SEXP x, SEXP nsMap) { BEGIN_CPP - XPtrNode node(node_sxp); + NodeType type = getNodeType(x); + + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarString(node_name_impl(x, nsMap)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + SEXP name_i = node_name_impl(x_i, nsMap); + SET_STRING_ELT(out, i, name_i); + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); + } - std::string name = nodeName(node.checked_get(), nsMap); - return Rf_ScalarString(Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8)); END_CPP } @@ -48,12 +97,56 @@ extern "C" SEXP node_set_name(SEXP node_sxp, SEXP value) { END_CPP } +SEXP node_text_impl(SEXP x) { + NodeType type = getNodeType(x); + + SEXP out; + + switch(type) { + case NodeType::missing: + out = NA_STRING; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + + out = Xml2String(xmlNodeGetContent(node.checked_get())).asRString(); + break; + } + default: stop_unexpected_node_type(); + } + + return out; +} + // [[export]] -extern "C" SEXP node_text(SEXP node_sxp) { +extern "C" SEXP node_text(SEXP x) { BEGIN_CPP - XPtrNode node(node_sxp); + NodeType type = getNodeType(x); + + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarString(node_text_impl(x)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + SEXP name_i = node_text_impl(x_i); + SET_STRING_ELT(out, i, name_i); + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); + } - return Rf_ScalarString(Xml2String(xmlNodeGetContent(node.checked_get())).asRString()); END_CPP } @@ -82,15 +175,66 @@ const xmlChar* xmlNsDefinition(xmlNodePtr node, const xmlChar* lookup) { return NULL; } + +SEXP node_attr_impl(SEXP x, + const std::string& name, + SEXP missingVal, + SEXP nsMap_sxp) { + NodeType type = getNodeType(x); + + switch(type) { + case NodeType::missing: + return NA_STRING; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + if (name == "xmlns") { + return Xml2String(xmlNsDefinition(node, NULL)).asRString(missingVal); + } + + if (hasPrefix("xmlns:", name)) { + std::string prefix = name.substr(6); + return Xml2String(xmlNsDefinition(node, asXmlChar(prefix))).asRString(missingVal); + } + + xmlChar* string; + if (Rf_xlength(nsMap_sxp) == 0) { + string = xmlGetProp(node.checked_get(), asXmlChar(name)); + } else { + size_t colon = name.find(':'); + if (colon == std::string::npos) { + // Has namespace spec, but attribute not qualified, so look for attribute + // without namespace + string = xmlGetNoNsProp(node.checked_get(), asXmlChar(name)); + } else { + // Split name into prefix & attr, then look up full url + std::string + prefix = name.substr(0, colon), + attr = name.substr(colon + 1, name.size() - 1); + + std::string url = NsMap(nsMap_sxp).findUrl(prefix); + + string = xmlGetNsProp(node.checked_get(), asXmlChar(attr), asXmlChar(url)); + } + } + + return Xml2String(string).asRString(missingVal); + break; + } + default: stop_unexpected_node_type(); + } +} + // [[export]] extern "C" SEXP node_attr( - SEXP node_sxp, + SEXP x, SEXP name_sxp, SEXP missing_sxp, SEXP nsMap_sxp) { BEGIN_CPP + NodeType type = getNodeType(x); - XPtrNode node(node_sxp); std::string name(CHAR(STRING_ELT(name_sxp, 0))); if (Rf_xlength(missing_sxp) != 1) { @@ -99,98 +243,129 @@ extern "C" SEXP node_attr( SEXP missingVal = STRING_ELT(missing_sxp, 0); - if (name == "xmlns") { - return Rf_ScalarString(Xml2String(xmlNsDefinition(node, NULL)).asRString(missingVal)); - } - - if (hasPrefix("xmlns:", name)) { - std::string prefix = name.substr(6); - return Rf_ScalarString(Xml2String(xmlNsDefinition(node, asXmlChar(prefix))).asRString(missingVal)); - } - - xmlChar* string; - if (Rf_xlength(nsMap_sxp) == 0) { - string = xmlGetProp(node.checked_get(), asXmlChar(name)); - } else { - size_t colon = name.find(':'); - if (colon == std::string::npos) { - // Has namespace spec, but attribute not qualified, so look for attribute - // without namespace - string = xmlGetNoNsProp(node.checked_get(), asXmlChar(name)); - } else { - // Split name into prefix & attr, then look up full url - std::string - prefix = name.substr(0, colon), - attr = name.substr(colon + 1, name.size() - 1); - - std::string url = NsMap(nsMap_sxp).findUrl(prefix); - - string = xmlGetNsProp(node.checked_get(), asXmlChar(attr), asXmlChar(url)); + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarString(node_attr_impl(x, name, missingVal, nsMap_sxp)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + SEXP attr_i = node_attr_impl(x_i, name, missingVal, nsMap_sxp); + SET_STRING_ELT(out, i, attr_i); } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); } - return Rf_ScalarString(Xml2String(string).asRString(missingVal)); END_CPP } -// [[export]] -extern "C" SEXP node_attrs(SEXP node_sxp, SEXP nsMap_sxp) { - BEGIN_CPP - XPtrNode node_(node_sxp); - - int n = 0; - xmlNodePtr node = node_.checked_get(); - - if (node->type == XML_ELEMENT_NODE) { - // attributes - for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next) - n++; - - // namespace definitions - for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next) - n++; - - SEXP names = PROTECT(Rf_allocVector(STRSXP, n)); - SEXP values = PROTECT(Rf_allocVector(STRSXP, n)); - - int i = 0; - for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next, ++i) { - std::string name = nodeName(cur, nsMap_sxp); - SET_STRING_ELT(names, i, Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8)); - - xmlNs* ns = cur->ns; - if (ns == NULL) { - if (Rf_xlength(nsMap_sxp) > 0) { - SET_STRING_ELT(values, i, Xml2String(xmlGetNoNsProp(node, cur->name)).asRString()); +SEXP node_attrs_impl(SEXP x, SEXP nsMap_sxp) { + NodeType type = getNodeType(x); + + switch(type) { + case NodeType::missing: + return Rf_ScalarString(NA_STRING); + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node_(node_sxp); + + int n = 0; + xmlNodePtr node = node_.checked_get(); + + if (node->type == XML_ELEMENT_NODE) { + // attributes + for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next) + n++; + + // namespace definitions + for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next) + n++; + + SEXP names = PROTECT(Rf_allocVector(STRSXP, n)); + SEXP values = PROTECT(Rf_allocVector(STRSXP, n)); + + int i = 0; + for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next, ++i) { + std::string name = nodeName(cur, nsMap_sxp); + SET_STRING_ELT(names, i, Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8)); + + xmlNs* ns = cur->ns; + if (ns == NULL) { + if (Rf_xlength(nsMap_sxp) > 0) { + SET_STRING_ELT(values, i, Xml2String(xmlGetNoNsProp(node, cur->name)).asRString()); + } else { + SET_STRING_ELT(values, i, Xml2String(xmlGetProp(node, cur->name)).asRString()); + } } else { - SET_STRING_ELT(values, i, Xml2String(xmlGetProp(node, cur->name)).asRString()); + SET_STRING_ELT(values, i, Xml2String(xmlGetNsProp(node, cur->name, ns->href)).asRString()); } - } else { - SET_STRING_ELT(values, i, Xml2String(xmlGetNsProp(node, cur->name, ns->href)).asRString()); } - } - for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next, ++i) { - if (cur->prefix == NULL) { - SET_STRING_ELT(names, i, Rf_mkChar("xmlns")); - } else { - std::string name = std::string("xmlns:") + Xml2String(cur->prefix).asStdString(); - SET_STRING_ELT(names,i, Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8)); + for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next, ++i) { + if (cur->prefix == NULL) { + SET_STRING_ELT(names, i, Rf_mkChar("xmlns")); + } else { + std::string name = std::string("xmlns:") + Xml2String(cur->prefix).asStdString(); + SET_STRING_ELT(names,i, Rf_mkCharLenCE(name.c_str(), name.size(), CE_UTF8)); + } + SET_STRING_ELT(values, i, Xml2String(cur->href).asRString()); } - SET_STRING_ELT(values, i, Xml2String(cur->href).asRString()); + + Rf_setAttrib(values, R_NamesSymbol, names); + + UNPROTECT(2); + return values; } - Rf_setAttrib(values, R_NamesSymbol, names); + return Rf_allocVector(STRSXP, 0); + break; + } + default: stop_unexpected_node_type(); + } +} - UNPROTECT(2); - return values; +// [[export]] +extern "C" SEXP node_attrs(SEXP x, SEXP nsMap_sxp) { + BEGIN_CPP + NodeType type = getNodeType(x); + + switch(type) + { + case NodeType::missing: + case NodeType::node: + return node_attrs_impl(x, nsMap_sxp); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + SEXP name_i = node_attrs_impl(x_i, nsMap_sxp); + SET_VECTOR_ELT(out, i, name_i); + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); } - return Rf_allocVector(STRSXP, 0); END_CPP } - // Fix the tree by removing the namespace pointers to the given tree void xmlRemoveNamespace(xmlNodePtr tree, xmlNsPtr ns) { @@ -443,22 +618,69 @@ extern "C" SEXP node_children(SEXP node_sxp, SEXP only_node_sxp) { END_CPP } +int node_length_impl(SEXP x, bool only_node) { + NodeType type = getNodeType(x); + + int out; + + switch(type) { + case NodeType::missing: + out = 0; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + + out = 0; + for(xmlNode* cur = node->xmlChildrenNode; cur != NULL; cur = cur->next) { + if (only_node && cur->type != XML_ELEMENT_NODE) { + continue; + } + ++out; + } + break; + } + default: stop_unexpected_node_type(); + } + + return out; +} + // [[export]] -extern "C" SEXP node_length(SEXP node_sxp, SEXP only_node_sxp) { +extern "C" SEXP node_length(SEXP x, SEXP only_node_sxp) { BEGIN_CPP + NodeType type = getNodeType(x); - XPtrNode node(node_sxp); bool only_node = LOGICAL(only_node_sxp)[0]; - int i = 0; - for(xmlNode* cur = node->xmlChildrenNode; cur != NULL; cur = cur->next) { - if (only_node && cur->type != XML_ELEMENT_NODE) { - continue; + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarInteger(node_length_impl(x, only_node)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + if (n == 0) { + return Rf_ScalarInteger(0); } - ++i; + + SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); + int* p_out = INTEGER(out); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + int length_i = node_length_impl(x_i, only_node); + p_out[i] = length_i; + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); } - return Rf_ScalarInteger(i); END_CPP } @@ -536,12 +758,56 @@ extern "C" SEXP node_parent(SEXP node_sxp) { END_CPP } +SEXP node_path_impl(SEXP x) { + NodeType type = getNodeType(x); + + SEXP out; + + switch(type) { + case NodeType::missing: + out = NA_STRING; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + + out = Xml2String(xmlGetNodePath(node.checked_get())).asRString(); + break; + } + default: stop_unexpected_node_type(); + } + + return out; +} + // [[export]] -extern "C" SEXP node_path(SEXP node_sxp) { +extern "C" SEXP node_path(SEXP x) { BEGIN_CPP - XPtrNode node(node_sxp); + NodeType type = getNodeType(x); + + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarString(node_path_impl(x)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + SEXP name_i = node_path_impl(x_i); + SET_STRING_ELT(out, i, name_i); + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); + } - return Rf_ScalarString(Xml2String(xmlGetNodePath(node.checked_get())).asRString()); END_CPP } @@ -575,12 +841,57 @@ extern "C" SEXP nodes_duplicated(SEXP nodes) { END_CPP } +int node_type_impl(SEXP x) { + NodeType type = getNodeType(x); + + int out; + + switch(type) { + case NodeType::missing: + out = NA_INTEGER; + break; + case NodeType::node: { + SEXP node_sxp = VECTOR_ELT(x, 0); + XPtrNode node(node_sxp); + + out = node->type; + break; + } + default: stop_unexpected_node_type(); + } + + return out; +} + // [[export]] -extern "C" SEXP node_type(SEXP node_sxp) { +extern "C" SEXP node_type(SEXP x) { BEGIN_CPP - XPtrNode node(node_sxp); + NodeType type = getNodeType(x); + + switch(type) + { + case NodeType::missing: + case NodeType::node: + return Rf_ScalarInteger(node_type_impl(x)); + break; + case NodeType::nodeset: { + int n = Rf_xlength(x); + + SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); + int* p_out = INTEGER(out); + + for (int i = 0; i < n; ++i) { + SEXP x_i = VECTOR_ELT(x, i); + int type_i = node_type_impl(x_i); + p_out[i] = type_i; + } + + UNPROTECT(1); + return out; + }; + default: stop_unexpected_node_type(); + } - return Rf_ScalarInteger(node->type); END_CPP } diff --git a/src/xml2_utils.h b/src/xml2_utils.h index 231b7e3..d77b3cf 100644 --- a/src/xml2_utils.h +++ b/src/xml2_utils.h @@ -9,6 +9,24 @@ #include #include +enum NodeType { + missing = 1, + node = 2, + nodeset = 3, +}; + +inline const NodeType getNodeType(SEXP x) { + if (Rf_inherits(x, "xml_node")) { + return(NodeType::node); + } else if (Rf_inherits(x, "xml_nodeset")) { + return(NodeType::nodeset); + } else if (Rf_inherits(x, "xml_missing")) { + return(NodeType::missing); + } else { + Rf_error("Unexpected node type"); + } +} + inline const xmlChar* asXmlChar(std::string const& x) { return (const xmlChar*) x.c_str(); } diff --git a/tests/testthat/_snaps/xml_name.md b/tests/testthat/_snaps/xml_name.md new file mode 100644 index 0000000..38ee479 --- /dev/null +++ b/tests/testthat/_snaps/xml_name.md @@ -0,0 +1,4 @@ +# error if missing ns spec + + Couldn't find prefix for url http://bar.com + diff --git a/tests/testthat/test-xml_missing.R b/tests/testthat/test-xml_missing.R index 66124d7..78a3a14 100644 --- a/tests/testthat/test-xml_missing.R +++ b/tests/testthat/test-xml_missing.R @@ -21,7 +21,7 @@ test_that("xml_missing methods return properly for all S3 methods", { expect_equal(tree_structure(mss), NA_character_) expect_error(write_xml(mss), "Missing data cannot be written") expect_error(write_html(mss), "Missing data cannot be written") - expect_equal(xml_attr(mss), NA_character_) + expect_equal(xml_attr(mss, "dummy_attr"), NA_character_) expect_equal(xml_attrs(mss), NA_character_) expect_equal(xml_find_all(mss), xml_nodeset()) expect_equal(xml_find_chr(mss), character()) @@ -33,7 +33,6 @@ test_that("xml_missing methods return properly for all S3 methods", { expect_equal(xml_parent(mss), xml_missing()) expect_equal(xml_path(mss), NA_character_) expect_equal(xml_text(mss), NA_character_) - expect_equal(xml_type(mss), NA_character_) expect_equal(xml_url(mss), NA_character_) }) diff --git a/tests/testthat/test-xml_name.R b/tests/testthat/test-xml_name.R index 94b3d28..869db26 100644 --- a/tests/testthat/test-xml_name.R +++ b/tests/testthat/test-xml_name.R @@ -1,3 +1,20 @@ +test_that("xml_name() returns the name", { + x <- read_xml(" +

Some text.

+

Some other.

+

No bold text

+ ") + + children <- xml_children(x) + x <- xml_find_first(children, ".//b|.//i") + + expect_equal(xml_name(x[[1]]), "b") + expect_equal(xml_name(x[[2]]), "i") + expect_equal(xml_name(x[[3]]), NA_character_) + + expect_equal(xml_name(x), c("b", "i", NA_character_)) +}) + test_that("qualified names returned when ns given", { x <- read_xml(test_path("ns-multiple-default.xml")) ns <- xml_ns(x) @@ -12,7 +29,7 @@ test_that("error if missing ns spec", { ns <- xml_ns(x)[1] bars <- xml_children(xml_children(x)) - expect_error(xml_name(bars, ns), "Couldn't find prefix") + expect_snapshot_error(xml_name(bars, ns)) }) test_that("xml_name<- modifies the name", { diff --git a/tests/testthat/test-xml_nodeset.R b/tests/testthat/test-xml_nodeset.R index 81382fb..3a92c0b 100644 --- a/tests/testthat/test-xml_nodeset.R +++ b/tests/testthat/test-xml_nodeset.R @@ -67,7 +67,6 @@ test_that("methods work on empty nodesets", { expect_output(xml_structure(empty), NA) expect_identical(xml_text(empty), character()) - expect_identical(xml_type(empty), character()) expect_identical(xml_url(empty), character()) }) diff --git a/tests/testthat/test-xml_text.R b/tests/testthat/test-xml_text.R index 56e8986..6b7af3b 100644 --- a/tests/testthat/test-xml_text.R +++ b/tests/testthat/test-xml_text.R @@ -6,22 +6,19 @@ test_that("xml_text returns only text without markup", { expect_identical(xml_text(xml_children(x)), "bold!") }) -test_that("xml_text returns only text without markup", { - x <- read_xml("

This is some text. This is bold!

") - - expect_identical(xml_text(x), "This is some text. This is bold!") - - expect_identical(xml_text(xml_children(x)), "bold!") -}) - test_that("xml_text works properly with xml_nodeset objects", { - x <- read_xml("This is some text. This is some nested text.") + x <- read_xml(" +

Some text.

+

Some other.

+

No bold text

+ ") - expect_identical(xml_text(x), "This is some text. This is some nested text.") + children <- xml_children(x) + x <- xml_find_first(children, ".//b|.//i") expect_identical( - xml_text(xml_find_all(x, "//x")), - c("This is some text. This is some nested text.", "This is some nested text.") + xml_text(x), + c("text", "other", NA) ) }) @@ -47,6 +44,12 @@ test_that("xml_text trims whitespace if requested, including non-breaking spaces xml_text(x, trim = TRUE), "Some text \u20ac" ) + + x2 <- read_html("

Some text €  

and more € text  ") + expect_identical( + xml_text(xml_find_all(x2, ".//p"), trim = TRUE), + c("Some text \u20ac", "and more \u20ac text") + ) }) test_that("xml_integer() returns an integer vector", { diff --git a/tests/testthat/test-xml_type.R b/tests/testthat/test-xml_type.R new file mode 100644 index 0000000..eda8230 --- /dev/null +++ b/tests/testthat/test-xml_type.R @@ -0,0 +1,18 @@ +test_that("xml_type() works", { + x <- read_xml(" +

Some text.

+

Some other.

+

No bold text

+ ") + + children <- xml_children(x) + x <- xml_find_first(children, ".//b|.//i") + + expect_equal(xml_type(x[[1]]), "element") + expect_equal(xml_type(x[[3]]), NA_character_) + + expect_equal(xml_type(x), c("element", "element", NA)) + + empty <- xml_children(x) + expect_identical(xml_type(empty), character()) +})