Browse files

[DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881)

  • Loading branch information...
1 parent a5b7adc commit 49d80fac919321c1e255757fe47b97910f667271 @fabpot fabpot committed May 7, 2012
Showing with 26 additions and 0 deletions.
  1. +4 −0 Crawler.php
  2. +22 −0 Tests/CrawlerTest.php
View
4 Crawler.php
@@ -129,6 +129,10 @@ public function addHtmlContent($content, $charset = 'UTF-8')
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;
+ if (function_exists('mb_convert_encoding')) {
+ $content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
+ }
+
$current = libxml_use_internal_errors(true);
@$dom->loadHTML($content);
libxml_use_internal_errors($current);
View
22 Tests/CrawlerTest.php
@@ -72,6 +72,17 @@ public function testAddHtmlContent()
/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
+ public function testAddHtmlContentCharset()
+ {
+ $crawler = new Crawler();
+ $crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
+
+ $this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
+ }
+
+ /**
+ * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
+ */
public function testAddHtmlContentWithErrors()
{
libxml_use_internal_errors(true);
@@ -111,6 +122,17 @@ public function testAddXmlContent()
/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
+ public function testAddXmlContentCharset()
+ {
+ $crawler = new Crawler();
+ $crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
+
+ $this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
+ }
+
+ /**
+ * @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
+ */
public function testAddXmlContentWithErrors()
{
libxml_use_internal_errors(true);

0 comments on commit 49d80fa

Please sign in to comment.