Skip to content

Commit

Permalink
WIP xpath_search()
Browse files Browse the repository at this point in the history
  • Loading branch information
mgirlich committed Aug 29, 2023
1 parent 6768e16 commit c215e94
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 33 deletions.
30 changes: 1 addition & 29 deletions R/xml_find.R
Expand Up @@ -111,37 +111,9 @@ xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x), flatten = TRUE, .
#' @export
#' @rdname xml_find_all
xml_find_first <- function(x, xpath, ns = xml_ns(x)) {
UseMethod("xml_find_first")
}

#' @export
xml_find_first.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
xml_missing()
.Call(xpath_search2, x, xpath, ns, 1)
}

#' @export
xml_find_first.xml_node <- function(x, xpath, ns = xml_ns(x)) {
.Call(xpath_search, x$node, x$doc, xpath, ns, 1)
}

#' @export
xml_find_first.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
if (length(x) == 0) {
return(xml_nodeset())
}

xml_nodeset(
lapply(
x,
function(x) {
xml_find_first(x, xpath = xpath, ns = ns)
}
),
deduplicate = FALSE
)
}


#' @export
#' @rdname xml_find_all
xml_find_num <- function(x, xpath, ns = xml_ns(x)) {
Expand Down
2 changes: 2 additions & 0 deletions src/init.c
Expand Up @@ -74,6 +74,7 @@ extern SEXP url_unescape_(SEXP);
extern SEXP xml_parse_options_(void);
extern SEXP xml_save_options_(void);
extern SEXP xpath_search(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP xpath_search2(SEXP, SEXP, SEXP, SEXP);

static const R_CallMethodDef CallEntries[] = {
{"doc_has_root", (DL_FUNC) &doc_has_root, 1},
Expand Down Expand Up @@ -139,6 +140,7 @@ static const R_CallMethodDef CallEntries[] = {
{"xml_parse_options_", (DL_FUNC) &xml_parse_options_, 0},
{"xml_save_options_", (DL_FUNC) &xml_save_options_, 0},
{"xpath_search", (DL_FUNC) &xpath_search, 5},
{"xpath_search2", (DL_FUNC) &xpath_search2, 4},
{NULL, NULL, 0}
};

Expand Down
69 changes: 67 additions & 2 deletions src/xml2_xpath.cpp
Expand Up @@ -116,10 +116,74 @@ class XmlSeeker {

};

SEXP xpath_search_impl(SEXP x, const char* xpath, SEXP nsMap_sxp, double num_results) {
NodeType type = getNodeType(x);

SEXP out;

switch(type) {
case NodeType::missing:
Rprintf("-> missing\n");
out = new_xml_missing();
break;
case NodeType::node: {
SEXP node_sxp = VECTOR_ELT(x, 0);
XPtrNode node(node_sxp);
SEXP doc_sxp = VECTOR_ELT(x, 1);
XPtrDoc doc(doc_sxp);

XmlSeeker seeker(doc, node.checked_get());
seeker.registerNamespace(nsMap_sxp);
out = seeker.search(xpath, num_results);
break;
}
default: Rf_error("Unexpected node type");
}

return(out);
}

// [[export]]
extern "C" SEXP xpath_search(SEXP node_sxp, SEXP doc_sxp, SEXP xpath_sxp, SEXP nsMap_sxp, SEXP num_results_sxp) {
extern "C" SEXP xpath_search2(SEXP x, SEXP xpath_sxp, SEXP nsMap_sxp, SEXP num_results_sxp) {
if (TYPEOF(xpath_sxp) != STRSXP) {
Rf_error("XPath must be a string, received %s", Rf_type2char(TYPEOF(xpath_sxp)));
}
const char* xpath = CHAR(STRING_ELT(xpath_sxp, 0));

XPtrNode node(node_sxp);
double num_results = REAL(num_results_sxp)[0];
if (num_results == R_PosInf) {
num_results = INT_MAX;
}

NodeType type = getNodeType(x);

switch(type)
{
case NodeType::missing:
case NodeType::node :
return(xpath_search_impl(x, xpath, nsMap_sxp, num_results));
break;
case NodeType::nodeset: {
int n = Rf_xlength(x);

SEXP out = PROTECT(Rf_allocVector(VECSXP, n));

for (int i = 0; i < n; ++i) {
SEXP x_i = VECTOR_ELT(x, i);
SEXP res_i = xpath_search_impl(x_i, xpath, nsMap_sxp, num_results);
SET_VECTOR_ELT(out, i, res_i);
}

Rf_setAttrib(out, R_ClassSymbol, Rf_mkString("xml_nodeset"));

UNPROTECT(1);
return(out);
};
}
}

// [[export]]
extern "C" SEXP xpath_search(SEXP node_sxp, SEXP doc_sxp, SEXP xpath_sxp, SEXP nsMap_sxp, SEXP num_results_sxp) {
XPtrDoc doc(doc_sxp);
if (TYPEOF(xpath_sxp) != STRSXP) {
Rf_error("XPath must be a string, received %s", Rf_type2char(TYPEOF(xpath_sxp)));
Expand All @@ -128,6 +192,7 @@ extern "C" SEXP xpath_search(SEXP node_sxp, SEXP doc_sxp, SEXP xpath_sxp, SEXP n

double num_results = REAL(num_results_sxp)[0];

XPtrNode node(node_sxp);
if (num_results == R_PosInf) {
num_results = INT_MAX;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-xml_missing.R
Expand Up @@ -27,7 +27,7 @@ test_that("xml_missing methods return properly for all S3 methods", {
expect_equal(xml_find_chr(mss), character())
expect_equal(xml_find_lgl(mss), logical())
expect_equal(xml_find_num(mss), numeric())
expect_equal(xml_find_first(mss), xml_missing())
expect_equal(xml_find_first(mss, "dummy"), xml_missing())
expect_equal(xml_length(mss), 0L)
expect_equal(xml_name(mss), NA_character_)
expect_equal(xml_parent(mss), xml_missing())
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-xml_nodeset.R
Expand Up @@ -35,7 +35,7 @@ test_that("methods work on empty nodesets", {
expect_identical(xml_double(empty), numeric())
expect_identical(xml_find_all(empty), empty)
expect_identical(xml_find_chr(empty), character())
expect_identical(xml_find_first(empty), empty)
expect_identical(xml_find_first(empty, "dummy"), empty)
expect_identical(xml_find_lgl(empty), logical())
expect_identical(xml_find_num(empty), numeric())
expect_identical(xml_integer(empty), integer())
Expand Down

0 comments on commit c215e94

Please sign in to comment.