diff --git a/composer.json b/composer.json index f0b370cc..2edf5a7c 100644 --- a/composer.json +++ b/composer.json @@ -59,5 +59,21 @@ "phpstan/extension-installer": true, "simplesamlphp/composer-xmlprovider-installer": true } + }, + "scripts": { + "pre-commit": [ + "vendor/bin/phpcs -p", + "vendor/bin/composer-require-checker check --config-file=tools/composer-require-checker.json composer.json", + "vendor/bin/phpstan analyze -c phpstan.neon", + "vendor/bin/phpstan analyze -c phpstan-dev.neon", + "vendor/bin/composer-unused --excludePackage=simplesamlphp/composer-xmlprovider-installer", + "vendor/bin/phpunit --no-coverage --testdox" + ], + "tests": [ + "vendor/bin/phpunit --no-coverage" + ], + "propose-fix": [ + "vendor/bin/phpcs --report=diff" + ] } } diff --git a/src/XML/Constants.php b/src/XML/Constants.php index 96b2f4a0..6903702c 100644 --- a/src/XML/Constants.php +++ b/src/XML/Constants.php @@ -16,6 +16,11 @@ class Constants */ public const NS_XML = 'http://www.w3.org/XML/1998/namespace'; + /** + * The namespace for XMLNS declarations. + */ + public const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; + /** * The maximum amount of child nodes this library is willing to handle. * By specification, this limit is 150K, but that opens up for denial of service. diff --git a/src/XPath/XPath.php b/src/XPath/XPath.php index 0a400c63..c4d47955 100644 --- a/src/XPath/XPath.php +++ b/src/XPath/XPath.php @@ -5,8 +5,10 @@ namespace SimpleSAML\XPath; use DOMDocument; +use DOMElement; use DOMNode; use DOMXPath; +use RuntimeException; use SimpleSAML\XML\Assert\Assert; use SimpleSAML\XML\Constants as C_XML; use SimpleSAML\XMLSchema\Constants as C_XS; @@ -21,6 +23,12 @@ class XPath /** * Get an instance of DOMXPath associated with a DOMNode * + * - Reuses a cached DOMXPath per document. + * - Registers core XML-related namespaces: 'xml' and 'xs'. + * - Enriches the XPath with all prefixed xmlns declarations found on the + * current node and its ancestors (up to the document element), so + * custom prefixes declared anywhere up the tree can be used in queries. + * * @param \DOMNode $node The associated node * @return \DOMXPath */ @@ -42,10 +50,152 @@ public static function getXPath(DOMNode $node): DOMXPath $xpCache->registerNamespace('xml', C_XML::NS_XML); $xpCache->registerNamespace('xs', C_XS::NS_XS); + // Enrich with ancestor-declared prefixes for this document context. + $prefixToUri = self::registerAncestorNamespaces($xpCache, $node); + + // Single, bounded subtree scan to pick up descendant-only declarations. + self::registerSubtreePrefixes($xpCache, $node, $prefixToUri); + return $xpCache; } + /** + * Walk from the given node up to the document element, registering all prefixed xmlns declarations. + * + * Safety: + * - Only attributes in the XMLNS namespace (http://www.w3.org/2000/xmlns/). + * - Skip default xmlns (localName === 'xmlns') because XPath requires prefixes. + * - Skip empty URIs. + * - Do not override core 'xml' and 'xs' prefixes (already bound). + * - Nearest binding wins during this pass (prefixes are added once). + * + * @param \DOMXPath $xp + * @param \DOMNode $node + * @return array Map of prefix => namespace URI that are bound after this pass + */ + private static function registerAncestorNamespaces(DOMXPath $xp, DOMNode $node): array + { + // Track prefix => uri to feed into subtree scan. Seed with core bindings. + $prefixToUri = [ + 'xml' => C_XML::NS_XML, + 'xs' => C_XS::NS_XS, + ]; + + // Start from the nearest element (or documentElement if a DOMDocument is passed). + $current = $node instanceof DOMDocument + ? $node->documentElement + : ($node instanceof DOMElement ? $node : $node->parentNode); + + $steps = 0; + + while ($current instanceof DOMElement) { + if (++$steps > C_XML::UNBOUNDED_LIMIT) { + throw new RuntimeException(__METHOD__ . ': exceeded ancestor traversal limit'); + } + + if ($current->hasAttributes()) { + foreach ($current->attributes as $attr) { + if ($attr->namespaceURI !== C_XML::NS_XMLNS) { + continue; + } + $prefix = $attr->localName; + $uri = (string) $attr->nodeValue; + + if ( + $prefix === null || $prefix === '' || + $prefix === 'xmlns' || $uri === '' || + isset($prefixToUri[$prefix]) + ) { + continue; + } + + $xp->registerNamespace($prefix, $uri); + $prefixToUri[$prefix] = $uri; + } + } + + $current = $current->parentNode; + } + + return $prefixToUri; + } + + + /** + * Single-pass subtree scan from the context element to bind prefixes used only on descendants. + * - Never rebind an already-registered prefix (collision-safe). + * - Skips 'xmlns' and empty URIs. + * - Bounded by UNBOUNDED_LIMIT. + * + * @param \DOMXPath $xp + * @param \DOMNode $node + * @param array $prefixToUri + */ + private static function registerSubtreePrefixes(DOMXPath $xp, DOMNode $node, array $prefixToUri): void + { + $root = $node instanceof DOMDocument + ? $node->documentElement + : ($node instanceof DOMElement ? $node : $node->parentNode); + + if (!$root instanceof DOMElement) { + return; + } + + $visited = 0; + + /** @var array<\DOMElement> $queue */ + $queue = [$root]; + + while ($queue) { + /** @var \DOMElement $el */ + $el = array_shift($queue); + + if (++$visited > C_XML::UNBOUNDED_LIMIT) { + throw new \RuntimeException(__METHOD__ . ': exceeded subtree traversal limit'); + } + + // Element prefix + if ($el->prefix && !isset($prefixToUri[$el->prefix])) { + $uri = $el->namespaceURI; + if (is_string($uri) && $uri !== '') { + $xp->registerNamespace($el->prefix, $uri); + $prefixToUri[$el->prefix] = $uri; + } + } + + // Attribute prefixes (excluding xmlns) + if ($el->hasAttributes()) { + foreach ($el->attributes as $attr) { + if ( + $attr->prefix && + $attr->prefix !== 'xmlns' && + !isset($prefixToUri[$attr->prefix]) + ) { + $uri = $attr->namespaceURI; + if (is_string($uri) && $uri !== '') { + $xp->registerNamespace($attr->prefix, $uri); + $prefixToUri[$attr->prefix] = $uri; + } + } else { + // Optional: collision detection (same prefix, different URI) + // if ($prefixToUri[$pfx] !== $attr->namespaceURI) { + // // Default: skip rebind; could log a debug message here. + // } + } + } + } + + // Enqueue children (only DOMElement to keep types precise) + foreach ($el->childNodes as $child) { + if ($child instanceof DOMElement) { + $queue[] = $child; + } + } + } + } + + /** * Do an XPath query on an XML node. * diff --git a/tests/XML/ExtendableAttributesTraitTest.php b/tests/XML/ExtendableAttributesTraitTest.php index 36fe803c..77b65936 100644 --- a/tests/XML/ExtendableAttributesTraitTest.php +++ b/tests/XML/ExtendableAttributesTraitTest.php @@ -88,7 +88,6 @@ public function getAttributeNamespace(): array|string public function testEmptyNamespaceArrayThrowsAnException(): void { $this->expectException(AssertionFailedException::class); - // @phpstan-ignore expr.resultUnused new class ([]) extends ExtendableAttributesElement { /** * @return array|string diff --git a/tests/XML/ExtendableElementTraitTest.php b/tests/XML/ExtendableElementTraitTest.php index b7255c28..5eff7adb 100644 --- a/tests/XML/ExtendableElementTraitTest.php +++ b/tests/XML/ExtendableElementTraitTest.php @@ -85,7 +85,6 @@ public static function setUpBeforeClass(): void public function testIllegalNamespaceComboThrowsAnException(): void { $this->expectException(AssertionFailedException::class); - // @phpstan-ignore expr.resultUnused new class ([]) extends ExtendableElement { /** * @return array|string @@ -104,7 +103,6 @@ public function getElementNamespace(): array|string public function testEmptyNamespaceArrayThrowsAnException(): void { $this->expectException(AssertionFailedException::class); - // @phpstan-ignore expr.resultUnused new class ([]) extends ExtendableElement { /** * @return array|string diff --git a/tests/XPath/XPathTest.php b/tests/XPath/XPathTest.php new file mode 100644 index 00000000..dbec0617 --- /dev/null +++ b/tests/XPath/XPathTest.php @@ -0,0 +1,336 @@ +loadXML(<<<'XML' + + + value + +XML); + + // Doc B is different + $docB = new DOMDocument(); + $docB->loadXML(<<<'XML' + + +XML); + + $xpA1 = XPath::getXPath($docA); + $xpA2 = XPath::getXPath($docA); + $xpB = XPath::getXPath($docB); + + // Cached instance reused per same document + $this->assertSame($xpA1, $xpA2); + // Different document => different DOMXPath instance + $this->assertNotSame($xpA1, $xpB); + + // 'xml' prefix registered: query should be valid and return xml:space attribute + $rootA = $docA->documentElement; + $this->assertInstanceOf(DOMElement::class, $rootA); + $attrs = XPath::xpQuery($rootA, '@xml:space', $xpA1); + $this->assertCount(1, $attrs); + $this->assertSame('preserve', $attrs[0]->nodeValue); + } + + + public function testAncestorNamespaceRegistrationAllowsCustomPrefixes(): void + { + // Custom namespace declared on the root; query from a descendant node + $xml = <<<'XML' + + + + + ok + + + +XML; + $doc = new DOMDocument(); + $doc->loadXML($xml); + + // Use a deep context node to ensure ancestor-walk picks up xmlns:foo from root + $context = $doc->getElementsByTagName('b')->item(0); + $this->assertInstanceOf(\DOMElement::class, $context); + + $xp = XPath::getXPath($context); + + $nodes = XPath::xpQuery($context, 'foo:item', $xp); + $this->assertCount(1, $nodes); + $this->assertSame('ok', $nodes[0]->textContent); + } + + + public function testXpQueryThrowsOnMalformedExpression(): void + { + $doc = new DOMDocument(); + $doc->loadXML(''); + $xp = XPath::getXPath($doc); + + // If xpQuery throws a specific exception, put that class here instead of Throwable. + $this->expectException(Throwable::class); + // Keep message assertion resilient to libxml version differences. + $this->expectExceptionMessageMatches('/(XPath|expression).*invalid|malformed|error/i'); + + // Malformed XPath: missing closing bracket + $root = $doc->documentElement; + $this->assertInstanceOf(DOMElement::class, $root); + + // Avoid emitting a PHP warning; let xpQuery surface it as an exception. + libxml_use_internal_errors(true); + try { + XPath::xpQuery($root, '//*[', $xp); + } finally { + $errors = libxml_get_errors(); + self::assertCount(1, $errors); + self::assertEquals("Invalid expression\n", $errors[0]->message); + libxml_clear_errors(); + libxml_use_internal_errors(false); + } + } + + + public function testXmlnsDetectionRegistersPrefixedNamespace(): void + { + // This XML ensures we hit the detection branch and pass the second guard: + // - xmlns:foo="urn:two" should be detected and registered + $xml = <<<'XML' + + + + ok + + +XML; + + $doc = new DOMDocument(); + $doc->loadXML($xml); + + $context = $doc->getElementsByTagName('ctx')->item(0); + $this->assertInstanceOf(DOMElement::class, $context); + + $xp = XPath::getXPath($context); + + // Passing the guards should register 'foo' so this resolves + $nodes = XPath::xpQuery($context, 'foo:item', $xp); + $this->assertCount(1, $nodes); + $this->assertSame('ok', $nodes[0]->textContent); + } + + + public function testNormalizationFromTextNode(): void + { + $xml = <<<'XML' + + + ok + +XML; + $doc = new DOMDocument(); + $doc->loadXML($xml); + + $item = $doc->getElementsByTagNameNS('https://example.org/foo', 'item')->item(0); + $this->assertInstanceOf(DOMElement::class, $item); + + $text = $item->firstChild; // DOMText node inside + $this->assertInstanceOf(DOMText::class, $text); + + // getXPath should handle a non-element node, normalize to the nearest element ancestor, + // and register the 'foo' namespace so a prefixed query works. + $xp = XPath::getXPath($text); + + $nodes = XPath::xpQuery($text, 'ancestor::foo:item', $xp); + $this->assertCount(1, $nodes); + $this->assertSame('ok', $nodes[0]->textContent); + } + + + public function testNormalizationFromAttributeNode(): void + { + $xml = <<<'XML' + + + + +XML; + $doc = new DOMDocument(); + $doc->loadXML($xml); + + $elt = $doc->getElementsByTagNameNS('urn:bar', 'elt')->item(0); + $this->assertInstanceOf(\DOMElement::class, $elt); + + $attr = $elt->getAttributeNodeNS('urn:bar', 'attr'); + $this->assertInstanceOf(\DOMAttr::class, $attr); + + // getXPath should normalize from DOMAttr to the element and ensure 'bar' is registered. + $xp = XPath::getXPath($attr); + + $nodes = XPath::xpQuery($attr, 'ancestor::bar:elt', $xp); + $this->assertCount(1, $nodes); + + // Ensure we have an element before calling element-only methods. + $this->assertInstanceOf(\DOMElement::class, $nodes[0]); + /** @var \DOMElement $el */ + $el = $nodes[0]; + $this->assertSame('v', $el->getAttributeNS('urn:bar', 'attr')); + } + + + public function testSkipsDefaultNamespaceDeclarationDoesNotCreateUsableXmlnsPrefix(): void + { + // Default namespace present; no prefixed declaration. + // The guard should skip registering 'xmlns' as a usable prefix. + $xml = <<<'XML' + + + t + +XML; + $doc = new DOMDocument(); + $doc->loadXML($xml); + + $context = $doc->documentElement?->getElementsByTagName('b')->item(0); + $this->assertInstanceOf(\DOMElement::class, $context); + + $xp = XPath::getXPath($context); + + // Using 'xmlns' as a prefix should fail because the code skips binding it. + libxml_use_internal_errors(true); + try { + $this->expectException(\Throwable::class); + // The XPath helper wraps libxml errors into a generic message: + $this->expectExceptionMessage('Malformed XPath query or invalid contextNode provided.'); + XPath::xpQuery($context, 'xmlns:b', $xp); + } finally { + $errors = libxml_get_errors(); + $this->assertEquals("Undefined namespace prefix\n", $errors[0]->message); + libxml_clear_errors(); + libxml_use_internal_errors(false); + } + } + + + public function testSkipsEmptyUriNamespaceDeclaration(): void + { + // An empty-URI namespace declaration must be ignored by the guard ($uri === ''). + $xml = <<<'XML' + + + t + +XML; + // Attempting to use the 'empty' prefix should fail because it wasn't registered. + libxml_use_internal_errors(true); + try { + $doc = new DOMDocument(); + $doc->loadXML($xml); + $context = $doc->getElementsByTagName('child')->item(0); + $this->assertInstanceOf(\DOMElement::class, $context); + + $xp = XPath::getXPath($context); + $this->expectException(\Throwable::class); + // The XPath helper wraps libxml errors into a generic message: + $this->expectExceptionMessage('Malformed XPath query or invalid contextNode provided.'); + XPath::xpQuery($context, 'empty:whatever', $xp); + } finally { + $errors = libxml_get_errors(); + $this->assertEquals("xmlns:empty: Empty XML namespace is not allowed\n", $errors[0]->message); + $this->assertEquals("Undefined namespace prefix\n", $errors[1]->message); + libxml_clear_errors(); + libxml_use_internal_errors(false); + } + } + + + public function testXmlnsPrefixedDeclarationRegistersNamespaceViaAttributeBranch(): void + { + // Build DOM programmatically to ensure xmlns:foo exists as attribute. + $doc = new DOMDocument('1.0', 'UTF-8'); + $root = $doc->createElement('root'); + $doc->appendChild($root); + + // Add xmlns:foo on the root (attribute-branch should detect and register) + $root->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:foo', 'https://example.org/foo'); + + // Deep subtree that uses foo prefix but the context node itself is unprefixed + $ctx = $doc->createElement('ctx'); + $root->appendChild($ctx); + + $fooItem = $doc->createElementNS('https://example.org/foo', 'foo:item', 'ok'); + $ctx->appendChild($fooItem); + + // Use the unprefixed context to ensure ancestor-walk is required. + $xp = XPath::getXPath($ctx); + $nodes = XPath::xpQuery($ctx, 'foo:item', $xp); + + // If attribute-branch registered 'foo', the query resolves. + $this->assertCount(1, $nodes); + $this->assertSame('ok', $nodes[0]->textContent); + } + + + /** + * Provides relative file paths for the two XML variants. + * + * @return array + */ + public static function xmlVariantsProviderForTopLevelSlatePerson(): array + { + $base = dirname(__FILE__, 3) . '/tests/resources/xml'; + + return [ + "Register Subtree Prefixes" => [$base . '/success_response_a.xml'], + "Register Ancestor Namespaces" => [$base . '/success_response_b.xml'], + ]; + } + + /** + * Ensure that absolute XPath '/foo:serviceResponse/foo:authenticationSuccess/slate:person' + * finds the same top-level slate:person regardless of whether it appears before or after + * cas:attributes in the document, even when the slate prefix is only declared on the element itself. + */ + #[DataProvider('xmlVariantsProviderForTopLevelSlatePerson')] + public function testAbsoluteXPathFindsTopLevelSlatePerson(string $filePath): void + { + $doc = DOMDocumentFactory::fromFile($filePath); + + $fooNs = 'https://example.org/foo'; + /** @var \DOMElement|null $authn */ + $authn = $doc->getElementsByTagNameNS($fooNs, 'authenticationSuccess')->item(0); + $this->assertNotNull($authn, 'authenticationSuccess element not found'); + + $xp = XPath::getXPath($authn); + $query = '/foo:serviceResponse/foo:authenticationSuccess/slate:person'; + + $nodes = XPath::xpQuery($authn, $query, $xp); + + $this->assertSame(1, count($nodes), 'Expected exactly one top-level slate:person'); + $this->assertSame('12345_top', trim($nodes[0]->textContent)); + } +} diff --git a/tests/resources/xml/success_response_a.xml b/tests/resources/xml/success_response_a.xml new file mode 100644 index 00000000..ab1c3d85 --- /dev/null +++ b/tests/resources/xml/success_response_a.xml @@ -0,0 +1,21 @@ + + + jdoe + + 2025-11-07T22:00:24+02:00 + true + true + Doe + John + jdoe@example.edu + jdoe@example.edu + + 12345 + Fall-2025 + ABC-123 + + + 12345_top + + \ No newline at end of file diff --git a/tests/resources/xml/success_response_b.xml b/tests/resources/xml/success_response_b.xml new file mode 100644 index 00000000..855df978 --- /dev/null +++ b/tests/resources/xml/success_response_b.xml @@ -0,0 +1,20 @@ + + + jdoe + 12345_top + + 2025-11-07T22:00:24+02:00 + true + true + Doe + John + jdoe@example.edu + jdoe@example.edu + + 12345 + Fall-2025 + ABC-123 + + + \ No newline at end of file