Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/XML/Container/AbstractTestContainer.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,9 @@ abstract public function getLanguageValue(string $language = 'en'): LanguageValu
abstract public function getXMLAttribute(int $x = 1): XMLAttribute;


/**
* @param non-empty-string $text
* @return \Dom\NodeList<\Dom\Node>
*/
abstract public function getDOMText(string $text): Dom\NodeList;
}
2 changes: 1 addition & 1 deletion src/XML/Container/XMLSchemaElementsTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ trait XMLSchemaElementsTrait
/** @var array<positive-int, \SimpleSAML\XMLSchema\XML\Appinfo> */
protected array $appinfo = [];

/** @var array<non-empty-string, \Dom\Text> */
/** @var array<non-empty-string, \Dom\NodeList> */
protected array $domText = [];


Expand Down
154 changes: 114 additions & 40 deletions src/XML/DOMDocumentFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
use SimpleSAML\XPath\XPath;

use function file_get_contents;
use function func_num_args;
use function restore_error_handler;
use function set_error_handler;
use function sprintf;
use function strpos;

Expand All @@ -22,19 +23,50 @@
final class DOMDocumentFactory
{
/**
* Base libxml options used when parsing XML.
*
* Note: We add LIBXML_NO_XXE automatically when available (libxml >= 2.13.0).
*
* @var non-negative-int
* TODO: Add LIBXML_NO_XXE to the defaults when libxml 2.13.0 become generally available
*/
public const int DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN;
public const int DEFAULT_OPTIONS_BASE = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN;


/**
* @param string $xml
* @param non-negative-int $options
* @return non-negative-int
*/
public static function getDefaultOptions(): int
{
$options = self::DEFAULT_OPTIONS_BASE;

// Add LIBXML_NO_XXE to the defaults when available (libxml >= 2.13.0)
if (defined('LIBXML_NO_XXE')) {
$options |= \LIBXML_NO_XXE;
}

return $options;
}


/**
* Create a DOM XML document from an XML string.
*
* The input is validated to reject potentially dangerous constructs (e.g. DOCTYPE).
* Parser warnings/notices are converted into {@see \DOMException}.
*
* @param non-empty-string $xml XML document as a string.
* @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
* (including {@see \LIBXML_NO_XXE} when available).
*
* @return \Dom\XMLDocument
*
* @throws \SimpleSAML\Assert\AssertionFailedException If $xml is empty/whitespace-only or contains a DOCTYPE.
* @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
* @throws \DOMException If parsing emits warnings/notices or fails.
*/
public static function fromString(
string $xml,
int $options = self::DEFAULT_OPTIONS,
?int $options = null,
): Dom\XMLDocument {
Assert::notWhitespaceOnly($xml);
Assert::notRegex(
Expand All @@ -44,13 +76,25 @@ public static function fromString(
RuntimeException::class,
);

// If LIBXML_NO_XXE is available and option not set
if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) {
$options |= \LIBXML_NO_XXE;
}
$options = $options ?? self::getDefaultOptions();

$domDocument = self::create();
$loaded = $domDocument->createFromString($xml, $options);

// Convert parser warnings/notices into DOMException to avoid PHP warnings leaking into test output
set_error_handler(
/**
* @throws \DOMException
*/
static function (int $severity, string $message): never {
throw new \DOMException($message);
},
);

try {
$loaded = $domDocument->createFromString($xml, $options);
} finally {
restore_error_handler();
}

foreach ($domDocument->childNodes as $child) {
Assert::false(
Expand All @@ -65,12 +109,25 @@ public static function fromString(


/**
* @param string $file
* @param non-negative-int $options
* Create a DOM XML document from an XML file.
*
* The file is read into a string and then parsed using {@see self::fromString()}.
*
* @param non-empty-string $file Path to the XML file.
* @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
* (including {@see \LIBXML_NO_XXE} when available).
*
* @return \Dom\XMLDocument
*
* @throws \SimpleSAML\XML\Exception\IOException If the file cannot be read.
* @throws \SimpleSAML\Assert\AssertionFailedException If the file content is empty/whitespace-only
* or contains a DOCTYPE.
* @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
* @throws \DOMException If parsing emits warnings/notices or fails.
*/
public static function fromFile(
string $file,
int $options = self::DEFAULT_OPTIONS,
?int $options = null,
): Dom\XMLDocument {
error_clear_last();
$xml = @file_get_contents($file);
Expand All @@ -82,7 +139,8 @@ public static function fromFile(
}

Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class);
return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options);

return static::fromString($xml, $options);
}


Expand All @@ -96,18 +154,31 @@ public static function create(string $encoding = 'UTF-8'): Dom\XMLDocument


/**
* @param \Dom\XMLDocument $doc
* Normalize namespace declarations in an XML document.
*
* This method collects namespace declarations required by prefixed elements and moves the corresponding
* {@code xmlns:prefix} declarations to the document root, removing {@code xmlns} / {@code xmlns:*} attributes
* from descendant elements.
*
* Note: this mutates the provided document and is not a substitute for XML canonicalization (C14N).
*
* @param \Dom\XMLDocument $doc The XML document to normalize.
*
* @return \Dom\XMLDocument The same document instance, potentially modified. If the document has no root element
* or no namespace declarations to normalize, it is returned unchanged.
*/
public static function normalizeDocument(Dom\XMLDocument $doc): Dom\XMLDocument
{
// Get the root element
$root = $doc->documentElement;
if ($root === null) {
return $doc;
}

// Collect all xmlns attributes from the document
$xpath = XPath::getXPath($doc);
$xmlnsAttributes = [];

// Register all namespaces to ensure XPath can handle them
// Collect namespace declarations needed for prefixed elements in the document
foreach ($xpath->query('//*[namespace::*]') as $node) {
if ($node instanceof Dom\Element) {
$name = 'xmlns:' . $node->prefix;
Expand All @@ -123,40 +194,45 @@ public static function normalizeDocument(Dom\XMLDocument $doc): Dom\XMLDocument
return $doc;
}

// Remove xmlns attributes from all elements
$nodes = $xpath->query('//*[namespace::*]');
foreach ($nodes as $node) {
if ($node instanceof Dom\Element) {
$attributesToRemove = [];
foreach ($node->attributes as $attr) {
if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') {
$attributesToRemove[] = $attr->namespaceURI;
}
// Remove xmlns attributes from all elements (proper XMLNS namespace removal)
foreach ($xpath->query('//*[namespace::*]') as $node) {
if (!$node instanceof Dom\Element) {
continue;
}

foreach ($node->attributes as $attr) {
if ($attr->namespaceURI === C::NS_XMLNS) {
$node->removeAttributeNS(C::NS_XMLNS, $attr->localName);
continue;
}

foreach ($attributesToRemove as $attrName) {
$node->removeAttribute($attrName);
if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') {
// Fallback for implementations that still expose xmlns attrs without namespaceURI
$node->removeAttribute($attr->nodeName);
}
}
}

// Add all collected xmlns attributes to the root element
foreach ($xmlnsAttributes as $name => $value) {
$root->setAttribute($name, $value);
$root->setAttributeNS(C::NS_XMLNS, $name, $value);
}

// Get the normalized string
/** @var \Dom\XMLDocument $ownerDocument */
$ownerDocument = $root->ownerDocument;

// Return the normalized XML
return static::fromString($ownerDocument->saveXml($ownerDocument->documentElement));
return $doc;
}


/**
* @param \Dom\Element $elt
* @param string|null $prefix
* Resolve a namespace URI for a given prefix in the context of an element.
*
* The reserved prefixes {@code xml} and {@code xmlns} are mapped to their well-known namespace URIs.
* For all other prefixes, this method inspects the in-scope namespaces of the document element.
*
* @param \Dom\Element $elt An element belonging to the document whose in-scope namespaces will be consulted.
* @param string|null $prefix The namespace prefix to resolve. Use {@see null} to resolve the default namespace.
*
* @return string|null The namespace URI associated with the given prefix, or {@see null}
* if the prefix is not bound.
*/
public static function lookupNamespaceURI(Dom\Element $elt, ?string $prefix): ?string
{
Expand All @@ -167,11 +243,9 @@ public static function lookupNamespaceURI(Dom\Element $elt, ?string $prefix): ?s
return C::NS_XMLNS;
}


/** @var \Dom\NamespaceInfo[] $namespaces */
$namespaces = $elt->ownerDocument->documentElement->getInScopeNamespaces();

$xmlnsAttributes = [];
foreach ($namespaces as $ns) {
if ($ns->prefix === $prefix) {
return $ns->namespaceURI;
Expand Down
9 changes: 8 additions & 1 deletion src/XML/ExtendableAttributesTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
use Dom;
use RuntimeException;
use SimpleSAML\XML\Assert\Assert;
use SimpleSAML\XML\Attribute;
use SimpleSAML\XML\Constants as C;
use SimpleSAML\XMLSchema\Exception\InvalidDOMAttributeException;
use SimpleSAML\XMLSchema\Exception\SchemaViolationException;
Expand Down Expand Up @@ -109,6 +108,10 @@ protected static function getAttributesNSFromXML(
Assert::oneOf($namespace, NS::$PREDEFINED);

foreach ($xml->attributes as $a) {
if ($a->namespaceURI === C::NS_XMLNS) {
continue;
}

if (
$exclusionList
&& (in_array([$a->namespaceURI, $a->localName], $exclusionList, true)
Expand Down Expand Up @@ -148,6 +151,10 @@ protected static function getAttributesNSFromXML(
}

foreach ($xml->attributes as $a) {
if ($a->namespaceURI === C::NS_XMLNS) {
continue;
}

if (in_array([$a->namespaceURI, $a->localName], $exclusionList, true)) {
continue;
} elseif (!in_array($a->namespaceURI, $namespace, true)) {
Expand Down
22 changes: 20 additions & 2 deletions src/XML/TestUtils/SerializableElementTestTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
namespace SimpleSAML\XML\TestUtils;

use Dom;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\Attributes\Depends;
use SimpleSAML\XML\DOMDocumentFactory;

use function class_exists;
use function strval;

/**
* Test for Serializable XML classes to perform default serialization tests.
Expand Down Expand Up @@ -48,7 +51,7 @@ public function testUnmarshalling(): void
} else {
$elt = self::$testedClass::fromXML(self::$xmlRepresentation->documentElement);

$this->assertEquals(
$this->assertXmlStringEquals(
self::$xmlRepresentation->saveXml(self::$xmlRepresentation->documentElement),
strval($elt),
);
Expand All @@ -74,10 +77,25 @@ public function testSerialization(): void
. ':$xmlRepresentation to a DOMDocument representing the XML-class being tested',
);
} else {
$this->assertEquals(
$this->assertXmlStringEquals(
self::$xmlRepresentation->saveXml(self::$xmlRepresentation->documentElement),
strval(unserialize(serialize(self::$testedClass::fromXML(self::$xmlRepresentation->documentElement)))),
);
}
}


private function assertXmlStringEquals(string $expectedXml, string $actualXml): void
{
$expectedDoc = DOMDocumentFactory::fromString($expectedXml);
$actualDoc = DOMDocumentFactory::fromString($actualXml);

Assert::assertNotNull($expectedDoc->documentElement);
Assert::assertNotNull($actualDoc->documentElement);

Assert::assertSame(
$expectedDoc->documentElement->C14N(),
$actualDoc->documentElement->C14N(),
);
}
}
6 changes: 5 additions & 1 deletion src/XPath/XPath.php
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ private static function registerAncestorNamespaces(Dom\XPath $xp, Dom\Node $node
$uri = (string) $attr->nodeValue;

if (
$prefix === '' || $prefix === null || $prefix === 'xmlns' || $uri === '' || isset($prefixToUri[$prefix])
$prefix === ''
|| $prefix === null
|| $prefix === 'xmlns'
|| $uri === ''
|| isset($prefixToUri[$prefix])
) {
continue;
}
Expand Down
Loading