From df87b19645650293f5c78a0d20bac2c88d259041 Mon Sep 17 00:00:00 2001 From: Jeroen Ooms Date: Sun, 16 Nov 2025 20:03:46 +0100 Subject: [PATCH 1/2] Implement ExternalEntityLoader for http downloads --- R/init.R | 7 +++++++ src/xml2_init.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/R/init.R b/R/init.R index ae0f6e6..1ecc7c5 100644 --- a/R/init.R +++ b/R/init.R @@ -9,3 +9,10 @@ xml_parse_options <- function() { xml_save_options <- function() { .Call(xml_save_options_) } + +download_file_callback <- function(url){ + tmp <- tempfile() + on.exit(unlink(tmp)) + download.file(url, tmp, quiet = TRUE) + readBin(tmp, raw(), file.info(tmp)$size) +} diff --git a/src/xml2_init.c b/src/xml2_init.c index 6451a83..34a1010 100644 --- a/src/xml2_init.c +++ b/src/xml2_init.c @@ -4,6 +4,8 @@ #include #include +static xmlExternalEntityLoader defaultLoader = NULL; + /* * * * Author: Nick Wellnhofer * Date: Tue, 24 Oct 2023 15:02:36 +0200 @@ -49,6 +51,34 @@ void handleGenericError(void *ctx, const char *fmt, ...){ Rf_error("%s", buffer); } +xmlParserInput *download_file_callback(const char *url){ + SEXP arg = PROTECT(Rf_mkString(url)); + SEXP expr = PROTECT(Rf_install("download_file_callback")); + SEXP call = PROTECT(Rf_lang2(expr, arg)); + SEXP env = R_FindNamespace(Rf_mkString("xml2")); + int err = 1; + SEXP out = PROTECT(R_tryEvalSilent(call, env, &err)); + if(err) return NULL; + xmlParserInputFlags flags = XML_INPUT_BUF_STATIC | XML_INPUT_USE_SYS_CATALOG; + xmlParserInput *buf = xmlNewInputFromMemory(url, RAW(out), Rf_length(out), flags); + //xmlParserInputBuffer *buf = xmlParserInputBufferCreateMem((char*) RAW(out), Rf_length(out), XML_CHAR_ENCODING_UTF8); + UNPROTECT(4); + return buf; +} + +static xmlParserInputPtr myExternalEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt){ + if (URL && (strncmp(URL, "http://", 7) == 0 || strncmp(URL, "https://", 8) == 0)) { + //REprintf("Fetching external resource %s\n", URL); + xmlParserInput *buf = download_file_callback(URL); + if(buf) return buf; + } + // Fallback to default behavior + if (defaultLoader) + return defaultLoader(URL, ID, ctxt); + return NULL; +} + + void init_libxml2_library(void) { // Check that header and libs are compatible LIBXML_TEST_VERSION @@ -56,5 +86,8 @@ void init_libxml2_library(void) { xmlInitParser(); xmlSetStructuredErrorFunc(NULL, handleStructuredError); xmlSetGenericErrorFunc(NULL, handleGenericError); -} + // Set custom download callback + defaultLoader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(myExternalEntityLoader); +} From dd3b54f58b89e7a753ddaa5a5ec57cdcb7a812de Mon Sep 17 00:00:00 2001 From: Jeroen Ooms Date: Sun, 16 Nov 2025 21:37:57 +0100 Subject: [PATCH 2/2] Experimental custom myExternalEntityLoader on libxml2 2.15 and up --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ src/xml2_init.c | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9f5a64d..9f7d53c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xml2 Title: Parse XML -Version: 1.4.1 +Version: 1.5.0 Authors@R: c( person("Hadley", "Wickham", role = "aut"), person("Jim", "Hester", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 32864d4..01947a3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# xml2 1.5.0 + +* Experimental custom myExternalEntityLoader on libxml2 2.15 and up. + # xml2 1.4.1 * Remove a test that broke with libxml2 2.15 diff --git a/src/xml2_init.c b/src/xml2_init.c index 34a1010..1484c9a 100644 --- a/src/xml2_init.c +++ b/src/xml2_init.c @@ -51,6 +51,8 @@ void handleGenericError(void *ctx, const char *fmt, ...){ Rf_error("%s", buffer); } +#if LIBXML_VERSION >= 21500 + xmlParserInput *download_file_callback(const char *url){ SEXP arg = PROTECT(Rf_mkString(url)); SEXP expr = PROTECT(Rf_install("download_file_callback")); @@ -78,6 +80,8 @@ static xmlParserInputPtr myExternalEntityLoader(const char *URL, const char *ID, return NULL; } +#endif + void init_libxml2_library(void) { // Check that header and libs are compatible @@ -88,6 +92,8 @@ void init_libxml2_library(void) { xmlSetGenericErrorFunc(NULL, handleGenericError); // Set custom download callback +#if LIBXML_VERSION >= 21500 defaultLoader = xmlGetExternalEntityLoader(); xmlSetExternalEntityLoader(myExternalEntityLoader); +#endif }