Skip to content

Commit

Permalink
Merge branch '2.8'
Browse files Browse the repository at this point in the history
* 2.8: (21 commits)
  Fix merge
  Fix typo
  Various fixes esp. on Windows
  Fix the validation of form resources to register the default theme
  Fix the retrieval of the value with property path when using a loader
  [appveyor] minor enhancements
  [Process] Disable failing tests on Windows
  [Translation] Fix the string casting in the XliffFileLoader
  Windows and Intl fixes
  Add appveyor.yml for C.I. on Windows
  [VarDumper] fixed HtmlDumper to target specific the head tag
  [travis] merge php: nightly and deps=high test-matrix lines
  consistently use str_replace to unify directory separators
  Support omitting the <target> node in an .xlf file.
  Fix the handling of values for multiple choice types
  moved PHP nightly to PHP 7.0
  fixed tests using deprecation features
  [Form] made deprecation notice more precise
  fixed CS
  Fix BC break after split of ACL from core
  ...

Conflicts:
	.travis.yml
	composer.json
	src/Symfony/Bundle/TwigBundle/DependencyInjection/Configuration.php
	src/Symfony/Component/Intl/DateFormatter/IntlDateFormatter.php
	src/Symfony/Component/Intl/Tests/DateFormatter/AbstractIntlDateFormatterTest.php
	src/Symfony/Component/Locale/Tests/LocaleTest.php
  • Loading branch information
nicolas-grekas committed Aug 27, 2015
2 parents 3d14b9f + be577b3 commit ea8b77e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 12 deletions.
45 changes: 34 additions & 11 deletions Crawler.php
Expand Up @@ -166,20 +166,43 @@ public function addHtmlContent($content, $charset = 'UTF-8')
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;

if (function_exists('mb_convert_encoding')) {
$hasError = false;
set_error_handler(function () use (&$hasError) {
$hasError = true;
});
$tmpContent = @mb_convert_encoding($content, 'HTML-ENTITIES', $charset);

restore_error_handler();

if (!$hasError) {
$content = $tmpContent;
set_error_handler(function () {throw new \Exception();});

try {
// Convert charset to HTML-entities to work around bugs in DOMDocument::loadHTML()

if (function_exists('mb_convert_encoding')) {
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
} elseif (function_exists('iconv')) {
$content = preg_replace_callback(
'/[\x80-\xFF]+/',
function ($m) {
$m = unpack('C*', $m[0]);
$i = 1;
$entities = '';

while (isset($m[$i])) {
if (0xF0 <= $m[$i]) {
$c = (($m[$i++] - 0xF0) << 18) + (($m[$i++] - 0x80) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
} elseif (0xE0 <= $m[$i]) {
$c = (($m[$i++] - 0xE0) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
} else {
$c = (($m[$i++] - 0xC0) << 6) + $m[$i++] - 0x80;
}

$entities .= '&#'.$c.';';
}

return $entities;
},
iconv($charset, 'UTF-8', $content)
);
}
} catch (\Exception $e) {
}

restore_error_handler();

if ('' !== trim($content)) {
@$dom->loadHTML($content);
}
Expand Down
4 changes: 3 additions & 1 deletion Tests/CrawlerTest.php
Expand Up @@ -81,6 +81,7 @@ public function testAddHtmlContent()

/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
* @requires extension mbstring
*/
public function testAddHtmlContentCharset()
{
Expand Down Expand Up @@ -115,6 +116,7 @@ public function testAddHtmlContentUnsupportedCharset()

/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
* @requires extension mbstring
*/
public function testAddHtmlContentCharsetGbk()
{
Expand Down Expand Up @@ -235,7 +237,7 @@ public function testAddContent()
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');

$crawler = new Crawler();
$crawler->addContent(mb_convert_encoding('<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>', 'SJIS', 'UTF-8'));
$crawler->addContent(iconv('UTF-8', 'SJIS', '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>'));
$this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
}

Expand Down

0 comments on commit ea8b77e

Please sign in to comment.