Skip to content

Commit

Permalink
bug #13145 [DomCrawler] Fix behaviour with <base> tag (dkop, WouterJ)
Browse files Browse the repository at this point in the history
This PR was merged into the 2.3 branch.

Discussion
----------

[DomCrawler] Fix behaviour with <base> tag

Finishes #12283

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #12283, #12143, #12144
| License       | MIT
| Doc PR        | -

Commits
-------

91447e8 Make fabbot happy
1d35e48 Clean up testing
61f22d7 [DomCrawler] fixed bug #12143
  • Loading branch information
fabpot committed Dec 29, 2014
2 parents 459b8b6 + 91447e8 commit 0469ea8
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 30 deletions.
52 changes: 29 additions & 23 deletions src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -23,21 +23,27 @@
class Crawler extends \SplObjectStorage
{
/**
* @var string The current URI or the base href value
* @var string The current URI
*/
protected $uri;

/**
* @var string The base href value
*/
private $baseHref;

/**
* Constructor.
*
* @param mixed $node A Node to use as the base for the crawling
* @param string $uri The current URI or the base href value
*
* @param string $currentUri The current URI
* @param string $baseHref The base href value
* @api
*/
public function __construct($node = null, $uri = null)
public function __construct($node = null, $currentUri = null, $baseHref = null)
{
$this->uri = $uri;
$this->uri = $currentUri;
$this->baseHref = $baseHref ?: $currentUri;

$this->add($node);
}
Expand Down Expand Up @@ -176,13 +182,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')

$baseHref = current($base);
if (count($base) && !empty($baseHref)) {
if ($this->uri) {
if ($this->baseHref) {
$linkNode = $dom->createElement('a');
$linkNode->setAttribute('href', $baseHref);
$link = new Link($linkNode, $this->uri);
$this->uri = $link->getUri();
$link = new Link($linkNode, $this->baseHref);
$this->baseHref = $link->getUri();
} else {
$this->uri = $baseHref;
$this->baseHref = $baseHref;
}
}
}
Expand Down Expand Up @@ -294,11 +300,11 @@ public function eq($position)
{
foreach ($this as $i => $node) {
if ($i == $position) {
return new static($node, $this->uri);
return new static($node, $this->uri, $this->baseHref);
}
}

return new static(null, $this->uri);
return new static(null, $this->uri, $this->baseHref);
}

/**
Expand All @@ -323,7 +329,7 @@ public function each(\Closure $closure)
{
$data = array();
foreach ($this as $i => $node) {
$data[] = $closure(new static($node, $this->uri), $i);
$data[] = $closure(new static($node, $this->uri, $this->baseHref), $i);
}

return $data;
Expand All @@ -344,12 +350,12 @@ public function reduce(\Closure $closure)
{
$nodes = array();
foreach ($this as $i => $node) {
if (false !== $closure(new static($node, $this->uri), $i)) {
if (false !== $closure(new static($node, $this->uri, $this->baseHref), $i)) {
$nodes[] = $node;
}
}

return new static($nodes, $this->uri);
return new static($nodes, $this->uri, $this->baseHref);
}

/**
Expand Down Expand Up @@ -391,7 +397,7 @@ public function siblings()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri);
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri, $this->baseHref);
}

/**
Expand All @@ -409,7 +415,7 @@ public function nextAll()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0)), $this->uri);
return new static($this->sibling($this->getNode(0)), $this->uri, $this->baseHref);
}

/**
Expand All @@ -427,7 +433,7 @@ public function previousAll()
throw new \InvalidArgumentException('The current node list is empty.');
}

return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri);
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri, $this->baseHref);
}

/**
Expand All @@ -454,7 +460,7 @@ public function parents()
}
}

return new static($nodes, $this->uri);
return new static($nodes, $this->uri, $this->baseHref);
}

/**
Expand All @@ -474,7 +480,7 @@ public function children()

$node = $this->getNode(0)->firstChild;

return new static($node ? $this->sibling($node) : array(), $this->uri);
return new static($node ? $this->sibling($node) : array(), $this->uri, $this->baseHref);
}

/**
Expand Down Expand Up @@ -601,7 +607,7 @@ public function filterXPath($xpath)

// If we dropped all expressions in the XPath while preparing it, there would be no match
if ('' === $xpath) {
return new static(null, $this->uri);
return new static(null, $this->uri, $this->baseHref);
}

return $this->filterRelativeXPath($xpath);
Expand Down Expand Up @@ -687,7 +693,7 @@ public function link($method = 'get')

$node = $this->getNode(0);

return new Link($node, $this->uri, $method);
return new Link($node, $this->baseHref, $method);
}

/**
Expand All @@ -701,7 +707,7 @@ public function links()
{
$links = array();
foreach ($this as $node) {
$links[] = new Link($node, $this->uri, 'get');
$links[] = new Link($node, $this->baseHref, 'get');
}

return $links;
Expand Down Expand Up @@ -792,7 +798,7 @@ public static function xpathLiteral($s)
*/
private function filterRelativeXPath($xpath)
{
$crawler = new static(null, $this->uri);
$crawler = new static(null, $this->uri, $this->baseHref);

foreach ($this as $node) {
$domxpath = new \DOMXPath($node->ownerDocument);
Expand Down
40 changes: 33 additions & 7 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Expand Up @@ -823,16 +823,42 @@ public function testParents()
}
}

public function testBaseTag()
/**
* @dataProvider getBaseTagData
*/
public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = null)
{
$crawler = new Crawler('<html><base href="http://base.com"><a href="link"></a></html>');
$this->assertEquals('http://base.com/link', $crawler->filterXPath('//a')->link()->getUri());
$crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
$this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
}

$crawler = new Crawler('<html><base href="//base.com"><a href="link"></a></html>', 'https://domain.com');
$this->assertEquals('https://base.com/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can use a schema-less URL');
public function getBaseTagData()
{
return array(
array('http://base.com', 'link', 'http://base.com/link'),
array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
);
}

$crawler = new Crawler('<html><base href="path/"><a href="link"></a></html>', 'https://domain.com');
$this->assertEquals('https://domain.com/path/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can set a path');
/**
* @dataProvider getBaseTagWithFormData
*/
public function testBaseTagWithForm($baseValue, $actionValue, $expectedUri, $currentUri = null, $description = null)
{
$crawler = new Crawler('<html><base href="'.$baseValue.'"><form method="post" action="'.$actionValue.'"><button type="submit" name="submit"/></form></html>', $currentUri);
$this->assertEquals($expectedUri, $crawler->filterXPath('//button')->form()->getUri(), $description);
}

public function getBaseTagWithFormData()
{
return array(
array('/basepath', '/registration', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and form action'),
array('/basepath', '', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and empty form action'),
array('http://base.com', '', 'http://domain.com/path/form', 'http://domain.com/path/form', '<base> tag does work with a URL and an empty form action'),
);
}

public function createTestCrawler($uri = null)
Expand Down

0 comments on commit 0469ea8

Please sign in to comment.