Skip to content

Commit

Permalink
[DomCrawler] Added auto-discovery of namespaces in Crawler::filter() …
Browse files Browse the repository at this point in the history
…and Crawler::filterByXPath().

Improved content type guessing.
  • Loading branch information
jakzal committed Sep 22, 2013
1 parent b1542f0 commit e5b8abb
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -92,7 +92,7 @@ public function add($node)
public function addContent($content, $type = null)
{
if (empty($type)) {
$type = 'text/html';
$type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html';
}

// DOM only for HTML/XML content
Expand Down Expand Up @@ -580,6 +580,15 @@ public function filterXPath($xpath)
}

$domxpath = new \DOMXPath($document);
if (preg_match_all('/(?P<prefix>[a-zA-Z_][a-zA-Z_0-9\-\.]+):[^:]/', $xpath, $matches)) {
foreach ($matches['prefix'] as $prefix) {
// ask for one namespace, otherwise we'd get a collection with an item for each node
$namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $prefix));
foreach ($namespaces as $node) {
$domxpath->registerNamespace($node->prefix, $node->nodeValue);
}
}
}

return new static($domxpath->query($xpath), $this->uri);
}
Expand Down
51 changes: 51 additions & 0 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Expand Up @@ -370,11 +370,31 @@ public function testFilterXPath()
$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
}

public function testFilterXPathWithDefaultNamespace()
{
$crawler = $this->createTestXmlCrawler()->filterXPath('//entry/id');
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
}

public function testFilterXPathWithNamespace()
{
$crawler = $this->createTestXmlCrawler()->filterXPath('//yt:accessControl');
$this->assertCount(2, $crawler, '->filterXPath() automatically registers a namespace');
}

public function testFilterXPathWithMultipleNamespaces()
{
$crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
$this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::filter
*/
public function testFilter()
{
$this->markSkippedIfCssSelectorNotPresent();

$crawler = $this->createTestCrawler();
$this->assertNotSame($crawler, $crawler->filter('li'), '->filter() returns a new instance of a crawler');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filter() returns a new instance of a crawler');
Expand All @@ -384,6 +404,14 @@ public function testFilter()
$this->assertCount(6, $crawler->filter('li'), '->filter() filters the node list with the CSS selector');
}

public function testFilterWithNamespace()
{
$this->markSkippedIfCssSelectorNotPresent();

$crawler = $this->createTestXmlCrawler()->filter('yt|accessControl');
$this->assertCount(2, $crawler, '->filter() automatically registers namespaces');
}

public function testSelectLink()
{
$crawler = $this->createTestCrawler();
Expand Down Expand Up @@ -656,6 +684,22 @@ public function createTestCrawler($uri = null)
return new Crawler($dom, $uri);
}

protected function createTestXmlCrawler($uri = null)
{
$xml = '<?xml version="1.0" encoding="UTF-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xmlns:yt="http://gdata.youtube.com/schemas/2007">
<id>tag:youtube.com,2008:video:kgZRZmEc9j4</id>
<yt:accessControl action="comment" permission="allowed"/>
<yt:accessControl action="videoRespond" permission="moderated"/>
<media:group>
<media:title type="plain">Chordates - CrashCourse Biology #24</media:title>
<yt:aspectRatio>widescreen</yt:aspectRatio>
</media:group>
</entry>';

return new Crawler($xml, $uri);
}

protected function createDomDocument()
{
$dom = new \DOMDocument();
Expand All @@ -672,4 +716,11 @@ protected function createNodeList()

return $domxpath->query('//div');
}

protected function markSkippedIfCssSelectorNotPresent()
{
if (!class_exists('Symfony\Component\CssSelector\CssSelector')) {
$this->markTestSkipped('The "CssSelector" component is not available');
}
}
}

0 comments on commit e5b8abb

Please sign in to comment.