Skip to content
Permalink
Browse files

minor #31257 [DomCrawler] fix HTML5 parser integration (nicolas-grekas)

This PR was merged into the 4.3 branch.

Discussion
----------

[DomCrawler] fix HTML5 parser integration

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | -
| License       | MIT
| Doc PR        | -

Spotted while reviewing #30892
The current logic is context-dependent: by changing the order of calls, you can get different behaviors.

Commits
-------

ba83bda [DomCrawler] fix HTML5 parser integration
  • Loading branch information...
nicolas-grekas committed May 9, 2019
2 parents cbbf8b7 + ba83bda commit 4f290d784c3c2222a79d5043b1472a8b62a67c0f
Showing with 8 additions and 15 deletions.
  1. +8 −15 src/Symfony/Component/DomCrawler/Crawler.php
@@ -69,6 +69,7 @@ public function __construct($node = null, string $uri = null, string $baseHref =
{
$this->uri = $uri;
$this->baseHref = $baseHref ?: $uri;
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
$this->add($node);
}
@@ -190,13 +191,7 @@ public function addContent($content, $type = null)
public function addHtmlContent($content, $charset = 'UTF-8')
{
// Use HTML5 parser if the content is HTML5 and the library is available
if (!$this->html5Parser
&& class_exists(HTML5::class)
&& '<!doctype html>' === strtolower(substr(ltrim($content), 0, 15))) {
$this->html5Parser = new HTML5(['disable_html_ns' => true]);
}
$dom = null !== $this->html5Parser ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset);
$dom = null !== $this->html5Parser && strspn($content, " \t\r\n") === stripos($content, '<!doctype html>') ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset);
$this->addDocument($dom);
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']);
@@ -599,18 +594,16 @@ public function html(/* $default = null */)
throw new \InvalidArgumentException('The current node list is empty.');
}
if (null !== $this->html5Parser) {
$html = '';
foreach ($this->getNode(0)->childNodes as $child) {
$html .= $this->html5Parser->saveHTML($child);
}
$node = $this->getNode(0);
$owner = $node->ownerDocument;
return $html;
if (null !== $this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
$owner = $this->html5Parser;
}
$html = '';
foreach ($this->getNode(0)->childNodes as $child) {
$html .= $child->ownerDocument->saveHTML($child);
foreach ($node->childNodes as $child) {
$html .= $owner->saveHTML($child);
}
return $html;

0 comments on commit 4f290d7

Please sign in to comment.
You can’t perform that action at this time.