Skip to content

Commit

Permalink
Merge branch 'LukasRos-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
paquettg committed Nov 8, 2015
2 parents 3cd5a70 + 45ff7fe commit fc41886
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
vendor
composer.lock
22 changes: 17 additions & 5 deletions src/PHPHtmlParser/Dom.php
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,12 @@ protected function isLoaded()
*/
protected function clean($str)
{
if ($this->options->get('cleanupInput') != true)
{
// skip entire cleanup step
return $str;
}

// clean out the \n\r
$str = str_replace(["\r\n", "\r", "\n"], ' ', $str);

Expand All @@ -372,14 +378,20 @@ protected function clean($str)
$str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);

// strip out <script> tags
$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
if ($this->options->get('removeScripts') == true)
{
$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
}

// strip out <style> tags
$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
if ($this->options->get('removeStyles') == true)
{
$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
}

// strip out pre-formatted tags
// strip out preformatted tags
$str = mb_eregi_replace("<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>", '', $str);

// strip out server side scripts
Expand Down
3 changes: 3 additions & 0 deletions src/PHPHtmlParser/Options.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Options {
'whitespaceTextNode' => true,
'strict' => false,
'enforceEncoding' => null,
'cleanupInput' => true,
'removeScripts' => true,
'removeStyles' => true
];

/**
Expand Down
73 changes: 73 additions & 0 deletions tests/Options/CleanupTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php

use PHPHtmlParser\Dom;

class CleanupTest extends PHPUnit_Framework_TestCase {

public function testCleanupInputTrue()
{
$dom = new Dom;
$dom->setOptions([
'cleanupInput' => true,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(0, count($dom->find('style')));
$this->assertEquals(0, count($dom->find('script')));
}

public function testCleanupInputFalse()
{
$dom = new Dom;
$dom->setOptions([
'cleanupInput' => false,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(1, count($dom->find('style')));
$this->assertEquals(1, count($dom->find('script')));
}

public function testRemoveStylesTrue()
{
$dom = new Dom;
$dom->setOptions([
'removeStyles' => true,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(0, count($dom->find('style')));
}

public function testRemoveStylesFalse()
{
$dom = new Dom;
$dom->setOptions([
'removeStyles' => false,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(1, count($dom->find('style')));
$this->assertEquals('text/css',
$dom->find('style')->getAttribute('type'));
}

public function testRemoveScriptsTrue()
{
$dom = new Dom;
$dom->setOptions([
'removeScripts' => true,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(0, count($dom->find('script')));
}

public function testRemoveScriptsFalse()
{
$dom = new Dom;
$dom->setOptions([
'removeScripts' => false,
]);
$dom->loadFromFile('tests/files/horrible.html');
$this->assertEquals(1, count($dom->find('script')));
$this->assertEquals('text/JavaScript',
$dom->find('script')->getAttribute('type'));
}

}

0 comments on commit fc41886

Please sign in to comment.