Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DomCrawler] Fix behaviour with <base> tag #13145

Merged
merged 3 commits into from
Dec 29, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 29 additions & 23 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,27 @@
class Crawler extends \SplObjectStorage
{
/**
* @var string The current URI or the base href value
* @var string The current URI
*/
protected $uri;

/**
* @var string The base href value
*/
private $baseHref;

/**
* Constructor.
*
* @param mixed $node A Node to use as the base for the crawling
* @param string $uri The current URI or the base href value
*
* @param string $currentUri The current URI
* @param string $baseHref The base href value
* @api
*/
public function __construct($node = null, $uri = null)
public function __construct($node = null, $currentUri = null, $baseHref = null)
{
$this->uri = $uri;
$this->uri = $currentUri;
$this->baseHref = $baseHref ?: $currentUri;

$this->add($node);
}
Expand Down Expand Up @@ -176,13 +182,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')

$baseHref = current($base);
if (count($base) && !empty($baseHref)) {
if ($this->uri) {
if ($this->baseHref) {
$linkNode = $dom->createElement('a');
$linkNode->setAttribute('href', $baseHref);
$link = new Link($linkNode, $this->uri);
$this->uri = $link->getUri();
$link = new Link($linkNode, $this->baseHref);
$this->baseHref = $link->getUri();
} else {
$this->uri = $baseHref;
$this->baseHref = $baseHref;
}
}
}
Expand Down Expand Up @@ -294,11 +300,11 @@ public function eq($position)
{
foreach ($this as $i => $node) {
if ($i == $position) {
return new static($node, $this->uri);
return new static($node, $this->uri, $this->baseHref);
}
}

return new static(null, $this->uri);
return new static(null, $this->uri, $this->baseHref);
}

/**
Expand All @@ -323,7 +329,7 @@ public function each(\Closure $closure)
{
$data = array();
foreach ($this as $i => $node) {
$data[] = $closure(new static($node, $this->uri), $i);
$data[] = $closure(new static($node, $this->uri, $this->baseHref), $i);
}

return $data;
Expand All @@ -344,12 +350,12 @@ public function reduce(\Closure $closure)
{
$nodes = array();
foreach ($this as $i => $node) {
if (false !== $closure(new static($node, $this->uri), $i)) {
if (false !== $closure(new static($node, $this->uri, $this->baseHref), $i)) {
$nodes[] = $node;
}
}

return new static($nodes, $this->uri);
return new static($nodes, $this->uri, $this->baseHref);
}

/**
Expand Down Expand Up @@ -391,7 +397,7 @@ public function siblings()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri);
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri, $this->baseHref);
}

/**
Expand All @@ -409,7 +415,7 @@ public function nextAll()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0)), $this->uri);
return new static($this->sibling($this->getNode(0)), $this->uri, $this->baseHref);
}

/**
Expand All @@ -427,7 +433,7 @@ public function previousAll()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri);
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri, $this->baseHref);
}

/**
Expand All @@ -454,7 +460,7 @@ public function parents()
}
}

return new static($nodes, $this->uri);
return new static($nodes, $this->uri, $this->baseHref);
}

/**
Expand All @@ -474,7 +480,7 @@ public function children()

$node = $this->getNode(0)->firstChild;

return new static($node ? $this->sibling($node) : array(), $this->uri);
return new static($node ? $this->sibling($node) : array(), $this->uri, $this->baseHref);
}

/**
Expand Down Expand Up @@ -601,7 +607,7 @@ public function filterXPath($xpath)

// If we dropped all expressions in the XPath while preparing it, there would be no match
if ('' === $xpath) {
return new static(null, $this->uri);
return new static(null, $this->uri, $this->baseHref);
}

return $this->filterRelativeXPath($xpath);
Expand Down Expand Up @@ -687,7 +693,7 @@ public function link($method = 'get')

$node = $this->getNode(0);

return new Link($node, $this->uri, $method);
return new Link($node, $this->baseHref, $method);
}

/**
Expand All @@ -701,7 +707,7 @@ public function links()
{
$links = array();
foreach ($this as $node) {
$links[] = new Link($node, $this->uri, 'get');
$links[] = new Link($node, $this->baseHref, 'get');
}

return $links;
Expand Down Expand Up @@ -792,7 +798,7 @@ public static function xpathLiteral($s)
*/
private function filterRelativeXPath($xpath)
{
$crawler = new static(null, $this->uri);
$crawler = new static(null, $this->uri, $this->baseHref);

foreach ($this as $node) {
$domxpath = new \DOMXPath($node->ownerDocument);
Expand Down
43 changes: 34 additions & 9 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,7 @@ public function testText()
public function testHtml()
{
$this->assertEquals('<img alt="Bar">', $this->createTestCrawler()->filterXPath('//a[5]')->html());
$this->assertEquals('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"></button>'
, trim($this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()));
$this->assertEquals('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"></button>', trim($this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()));

try {
$this->createTestCrawler()->filterXPath('//ol')->html();
Expand Down Expand Up @@ -824,16 +823,42 @@ public function testParents()
}
}

public function testBaseTag()
/**
* @dataProvider getBaseTagData
*/
public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = null)
{
$crawler = new Crawler('<html><base href="http://base.com"><a href="link"></a></html>');
$this->assertEquals('http://base.com/link', $crawler->filterXPath('//a')->link()->getUri());
$crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
$this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
}

$crawler = new Crawler('<html><base href="//base.com"><a href="link"></a></html>', 'https://domain.com');
$this->assertEquals('https://base.com/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can use a schema-less URL');
public function getBaseTagData()
{
return array(
array('http://base.com', 'link', 'http://base.com/link'),
array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
);
}

$crawler = new Crawler('<html><base href="path/"><a href="link"></a></html>', 'https://domain.com');
$this->assertEquals('https://domain.com/path/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can set a path');
/**
* @dataProvider getBaseTagWithFormData
*/
public function testBaseTagWithForm($baseValue, $actionValue, $expectedUri, $currentUri = null, $description = null)
{
$crawler = new Crawler('<html><base href="'.$baseValue.'"><form method="post" action="'.$actionValue.'"><button type="submit" name="submit"/></form></html>', $currentUri);
$this->assertEquals($expectedUri, $crawler->filterXPath('//button')->form()->getUri(), $description);
}

public function getBaseTagWithFormData()
{
return array(
array('/basepath', '/registration', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and form action'),
array('/basepath', '', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and empty form action'),
array('http://base.com', '', 'http://domain.com/path/form', 'http://domain.com/path/form', '<base> tag does work with a URL and an empty form action'),
);
}

public function createTestCrawler($uri = null)
Expand Down