Skip to content

Commit

Permalink
Sanitize libxml2 globals before parsing
Browse files Browse the repository at this point in the history
Fixes GHSA-3qrf-m4j2-pcrr.

To parse a document with libxml2, you first need to create a parsing context.
The parsing context contains parsing options (e.g. XML_NOENT to substitute
entities) that the application (in this case PHP) can set.
Unfortunately, libxml2 also supports providing default set options.
For example, if you call xmlSubstituteEntitiesDefault(1) then the XML_NOENT
option will be added to the parsing options every time you create a parsing
context **even if the application never requested XML_NOENT**.

Third party extensions can override these globals, in particular the
substitute entity global. This causes entity substitution to be
unexpectedly active.

Fix it by setting the parsing options to a sane known value.
For API calls that depend on global state we introduce
PHP_LIBXML_SANITIZE_GLOBALS() and PHP_LIBXML_RESTORE_GLOBALS().
For other APIs that work directly with a context we introduce
php_libxml_sanitize_parse_ctxt_options().
  • Loading branch information
nielsdos authored and derickr committed Jul 31, 2023
1 parent 8031612 commit c283c3a
Show file tree
Hide file tree
Showing 14 changed files with 216 additions and 6 deletions.
15 changes: 15 additions & 0 deletions ext/dom/document.c
Expand Up @@ -1254,6 +1254,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
options |= XML_PARSE_NOBLANKS;
}

php_libxml_sanitize_parse_ctxt_options(ctxt);
xmlCtxtUseOptions(ctxt, options);

ctxt->recovery = recover;
Expand Down Expand Up @@ -1548,7 +1549,9 @@ PHP_METHOD(DOMDocument, xinclude)

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);

PHP_LIBXML_SANITIZE_GLOBALS(xinclude);
err = xmlXIncludeProcessFlags(docp, (int)flags);
PHP_LIBXML_RESTORE_GLOBALS(xinclude);

/* XML_XINCLUDE_START and XML_XINCLUDE_END nodes need to be removed as these
are added via xmlXIncludeProcess to mark beginning and ending of xincluded document
Expand Down Expand Up @@ -1586,6 +1589,7 @@ PHP_METHOD(DOMDocument, validate)

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);

PHP_LIBXML_SANITIZE_GLOBALS(validate);
cvp = xmlNewValidCtxt();

cvp->userData = NULL;
Expand All @@ -1597,6 +1601,7 @@ PHP_METHOD(DOMDocument, validate)
} else {
RETVAL_FALSE;
}
PHP_LIBXML_RESTORE_GLOBALS(validate);

xmlFreeValidCtxt(cvp);

Expand Down Expand Up @@ -1631,14 +1636,18 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);

PHP_LIBXML_SANITIZE_GLOBALS(new_parser_ctxt);

switch (type) {
case DOM_LOAD_FILE:
if (CHECK_NULL_PATH(source, source_len)) {
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
zend_argument_value_error(1, "must not contain any null bytes");
RETURN_THROWS();
}
valid_file = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
if (!valid_file) {
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
php_error_docref(NULL, E_WARNING, "Invalid Schema file source");
RETURN_FALSE;
}
Expand All @@ -1659,6 +1668,7 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
parser);
sptr = xmlSchemaParse(parser);
xmlSchemaFreeParserCtxt(parser);
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
if (!sptr) {
if (!EG(exception)) {
php_error_docref(NULL, E_WARNING, "Invalid Schema");
Expand All @@ -1679,11 +1689,13 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
valid_opts |= XML_SCHEMA_VAL_VC_I_CREATE;
}

PHP_LIBXML_SANITIZE_GLOBALS(validate);
xmlSchemaSetValidOptions(vptr, valid_opts);
xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr);
is_valid = xmlSchemaValidateDoc(vptr, docp);
xmlSchemaFree(sptr);
xmlSchemaFreeValidCtxt(vptr);
PHP_LIBXML_RESTORE_GLOBALS(validate);

if (is_valid == 0) {
RETURN_TRUE;
Expand Down Expand Up @@ -1754,12 +1766,14 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
return;
}

PHP_LIBXML_SANITIZE_GLOBALS(parse);
xmlRelaxNGSetParserErrors(parser,
(xmlRelaxNGValidityErrorFunc) php_libxml_error_handler,
(xmlRelaxNGValidityWarningFunc) php_libxml_error_handler,
parser);
sptr = xmlRelaxNGParse(parser);
xmlRelaxNGFreeParserCtxt(parser);
PHP_LIBXML_RESTORE_GLOBALS(parse);
if (!sptr) {
php_error_docref(NULL, E_WARNING, "Invalid RelaxNG");
RETURN_FALSE;
Expand Down Expand Up @@ -1858,6 +1872,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
ctxt->sax->error = php_libxml_ctx_error;
ctxt->sax->warning = php_libxml_ctx_warning;
}
php_libxml_sanitize_parse_ctxt_options(ctxt);
if (options) {
htmlCtxtUseOptions(ctxt, (int)options);
}
Expand Down
2 changes: 2 additions & 0 deletions ext/dom/documentfragment.c
Expand Up @@ -114,7 +114,9 @@ PHP_METHOD(DOMDocumentFragment, appendXML) {
}

if (data) {
PHP_LIBXML_SANITIZE_GLOBALS(parse);
err = xmlParseBalancedChunkMemory(nodep->doc, NULL, NULL, 0, (xmlChar *) data, &lst);
PHP_LIBXML_RESTORE_GLOBALS(parse);
if (err != 0) {
RETURN_FALSE;
}
Expand Down
36 changes: 36 additions & 0 deletions ext/dom/tests/libxml_global_state_entity_loader_bypass.phpt
@@ -0,0 +1,36 @@
--TEST--
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
--SKIPIF--
<?php
if (!extension_loaded('libxml')) die('skip libxml extension not available');
if (!extension_loaded('dom')) die('skip dom extension not available');
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
?>
--FILE--
<?php

$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";

libxml_use_internal_errors(true);

function parseXML($xml) {
$doc = new DOMDocument();
@$doc->loadXML($xml);
$doc->createDocumentFragment()->appendXML("&bork;");
foreach (libxml_get_errors() as $error) {
var_dump(trim($error->message));
}
}

parseXML($xml);
zend_test_override_libxml_global_state();
parseXML($xml);

echo "Done\n";

?>
--EXPECT--
string(25) "Entity 'bork' not defined"
string(25) "Entity 'bork' not defined"
string(25) "Entity 'bork' not defined"
Done
36 changes: 36 additions & 0 deletions ext/libxml/php_libxml.h
Expand Up @@ -118,6 +118,42 @@ PHP_LIBXML_API void php_libxml_shutdown(void);
ZEND_TSRMLS_CACHE_EXTERN()
#endif

/* Other extension may override the global state options, these global options
* are copied initially to ctxt->options. Set the options to a known good value.
* See libxml2 globals.c and parserInternals.c.
* The unique_name argument allows multiple sanitizes and restores within the
* same function, even nested is necessary. */
#define PHP_LIBXML_SANITIZE_GLOBALS(unique_name) \
int xml_old_loadsubset_##unique_name = xmlLoadExtDtdDefaultValue; \
xmlLoadExtDtdDefaultValue = 0; \
int xml_old_validate_##unique_name = xmlDoValidityCheckingDefaultValue; \
xmlDoValidityCheckingDefaultValue = 0; \
int xml_old_pedantic_##unique_name = xmlPedanticParserDefault(0); \
int xml_old_substitute_##unique_name = xmlSubstituteEntitiesDefault(0); \
int xml_old_linenrs_##unique_name = xmlLineNumbersDefault(0); \
int xml_old_blanks_##unique_name = xmlKeepBlanksDefault(1);

#define PHP_LIBXML_RESTORE_GLOBALS(unique_name) \
xmlLoadExtDtdDefaultValue = xml_old_loadsubset_##unique_name; \
xmlDoValidityCheckingDefaultValue = xml_old_validate_##unique_name; \
(void) xmlPedanticParserDefault(xml_old_pedantic_##unique_name); \
(void) xmlSubstituteEntitiesDefault(xml_old_substitute_##unique_name); \
(void) xmlLineNumbersDefault(xml_old_linenrs_##unique_name); \
(void) xmlKeepBlanksDefault(xml_old_blanks_##unique_name);

/* Alternative for above, working directly on the context and not setting globals.
* Generally faster because no locking is involved, and this has the advantage that it sets the options to a known good value. */
static zend_always_inline void php_libxml_sanitize_parse_ctxt_options(xmlParserCtxtPtr ctxt)
{
ctxt->loadsubset = 0;
ctxt->validate = 0;
ctxt->pedantic = 0;
ctxt->replaceEntities = 0;
ctxt->linenumbers = 0;
ctxt->keepBlanks = 1;
ctxt->options = 0;
}

#else /* HAVE_LIBXML */
#define libxml_module_ptr NULL
#endif
Expand Down
6 changes: 6 additions & 0 deletions ext/simplexml/simplexml.c
Expand Up @@ -2278,7 +2278,9 @@ PHP_FUNCTION(simplexml_load_file)
RETURN_THROWS();
}

PHP_LIBXML_SANITIZE_GLOBALS(read_file);
docp = xmlReadFile(filename, NULL, (int)options);
PHP_LIBXML_RESTORE_GLOBALS(read_file);

if (!docp) {
RETURN_FALSE;
Expand Down Expand Up @@ -2331,7 +2333,9 @@ PHP_FUNCTION(simplexml_load_string)
RETURN_THROWS();
}

PHP_LIBXML_SANITIZE_GLOBALS(read_memory);
docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options);
PHP_LIBXML_RESTORE_GLOBALS(read_memory);

if (!docp) {
RETURN_FALSE;
Expand Down Expand Up @@ -2380,7 +2384,9 @@ SXE_METHOD(__construct)
RETURN_THROWS();
}

PHP_LIBXML_SANITIZE_GLOBALS(read_file_or_memory);
docp = is_url ? xmlReadFile(data, NULL, (int)options) : xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options);
PHP_LIBXML_RESTORE_GLOBALS(read_file_or_memory);

if (!docp) {
((php_libxml_node_object *)sxe)->document = NULL;
Expand Down
36 changes: 36 additions & 0 deletions ext/simplexml/tests/libxml_global_state_entity_loader_bypass.phpt
@@ -0,0 +1,36 @@
--TEST--
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
--SKIPIF--
<?php
if (!extension_loaded('libxml')) die('skip libxml extension not available');
if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
?>
--FILE--
<?php

$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";

libxml_use_internal_errors(true);
zend_test_override_libxml_global_state();

echo "--- String test ---\n";
simplexml_load_string($xml);
echo "--- Constructor test ---\n";
new SimpleXMLElement($xml);
echo "--- File test ---\n";
file_put_contents("libxml_global_state_entity_loader_bypass.tmp", $xml);
simplexml_load_file("libxml_global_state_entity_loader_bypass.tmp");

echo "Done\n";

?>
--CLEAN--
<?php
@unlink("libxml_global_state_entity_loader_bypass.tmp");
?>
--EXPECT--
--- String test ---
--- Constructor test ---
--- File test ---
Done
2 changes: 2 additions & 0 deletions ext/soap/php_xml.c
Expand Up @@ -91,6 +91,7 @@ xmlDocPtr soap_xmlParseFile(const char *filename)
if (ctxt) {
zend_bool old;

php_libxml_sanitize_parse_ctxt_options(ctxt);
ctxt->keepBlanks = 0;
ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
ctxt->sax->comment = soap_Comment;
Expand Down Expand Up @@ -139,6 +140,7 @@ xmlDocPtr soap_xmlParseMemory(const void *buf, size_t buf_size)
if (ctxt) {
zend_bool old;

php_libxml_sanitize_parse_ctxt_options(ctxt);
ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
ctxt->sax->comment = soap_Comment;
ctxt->sax->warning = NULL;
Expand Down
2 changes: 2 additions & 0 deletions ext/xml/compat.c
Expand Up @@ -17,6 +17,7 @@
#include "php.h"
#if defined(HAVE_LIBXML) && (defined(HAVE_XML) || defined(HAVE_XMLRPC)) && !defined(HAVE_LIBEXPAT)
#include "expat_compat.h"
#include "ext/libxml/php_libxml.h"

typedef struct _php_xml_ns {
xmlNsPtr nsptr;
Expand Down Expand Up @@ -469,6 +470,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *m
return NULL;
}

php_libxml_sanitize_parse_ctxt_options(parser->parser);
xmlCtxtUseOptions(parser->parser, XML_PARSE_OLDSAX);

parser->parser->replaceEntities = 1;
Expand Down
9 changes: 9 additions & 0 deletions ext/xmlreader/php_xmlreader.c
Expand Up @@ -284,6 +284,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz
return NULL;
}

PHP_LIBXML_SANITIZE_GLOBALS(parse);
if (error_func || warn_func) {
xmlRelaxNGSetParserErrors(parser,
(xmlRelaxNGValidityErrorFunc) error_func,
Expand All @@ -292,6 +293,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz
}
sptr = xmlRelaxNGParse(parser);
xmlRelaxNGFreeParserCtxt(parser);
PHP_LIBXML_RESTORE_GLOBALS(parse);

return sptr;
}
Expand Down Expand Up @@ -872,7 +874,9 @@ PHP_METHOD(XMLReader, open)
valid_file = _xmlreader_get_valid_file_path(source, resolved_path, MAXPATHLEN );

if (valid_file) {
PHP_LIBXML_SANITIZE_GLOBALS(reader_for_file);
reader = xmlReaderForFile(valid_file, encoding, options);
PHP_LIBXML_RESTORE_GLOBALS(reader_for_file);
}

if (reader == NULL) {
Expand Down Expand Up @@ -945,7 +949,9 @@ PHP_METHOD(XMLReader, setSchema)

intern = Z_XMLREADER_P(id);
if (intern && intern->ptr) {
PHP_LIBXML_SANITIZE_GLOBALS(schema);
retval = xmlTextReaderSchemaValidate(intern->ptr, source);
PHP_LIBXML_RESTORE_GLOBALS(schema);

if (retval == 0) {
RETURN_TRUE;
Expand Down Expand Up @@ -1068,6 +1074,7 @@ PHP_METHOD(XMLReader, XML)
}
uri = (char *) xmlCanonicPath((const xmlChar *) resolved_path);
}
PHP_LIBXML_SANITIZE_GLOBALS(text_reader);
reader = xmlNewTextReader(inputbfr, uri);

if (reader != NULL) {
Expand All @@ -1086,9 +1093,11 @@ PHP_METHOD(XMLReader, XML)
xmlFree(uri);
}

PHP_LIBXML_RESTORE_GLOBALS(text_reader);
return;
}
}
PHP_LIBXML_RESTORE_GLOBALS(text_reader);
}

if (uri) {
Expand Down
35 changes: 35 additions & 0 deletions ext/xmlreader/tests/libxml_global_state_entity_loader_bypass.phpt
@@ -0,0 +1,35 @@
--TEST--
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
--SKIPIF--
<?php
if (!extension_loaded('libxml')) die('skip libxml extension not available');
if (!extension_loaded('xmlreader')) die('skip xmlreader extension not available');
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
?>
--FILE--
<?php

$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";

libxml_use_internal_errors(true);
zend_test_override_libxml_global_state();

echo "--- String test ---\n";
$reader = XMLReader::xml($xml);
$reader->read();
echo "--- File test ---\n";
file_put_contents("libxml_global_state_entity_loader_bypass.tmp", $xml);
$reader = XMLReader::open("libxml_global_state_entity_loader_bypass.tmp");
$reader->read();

echo "Done\n";

?>
--CLEAN--
<?php
@unlink("libxml_global_state_entity_loader_bypass.tmp");
?>
--EXPECT--
--- String test ---
--- File test ---
Done

0 comments on commit c283c3a

Please sign in to comment.