Skip to content

Commit

Permalink
Added support for non-escaped quotes in attribute value
Browse files Browse the repository at this point in the history
fixes #37
  • Loading branch information
paquettg committed Oct 26, 2015
1 parent 5dc813c commit 6900951
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 18 deletions.
40 changes: 27 additions & 13 deletions src/PHPHtmlParser/Content.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,13 @@ class Content {
protected $slash = " />\r\n\t";
protected $attr = ' >';

/**
* Sets up the content block with its content.
*
* @param $content
*/
public function __construct($content)
{
$this->content = $content;
$this->size = strlen($content);
$this->pos = 0;
}

/**
* Returns the current position of the parser.
*
* @return int
*/
public function getPosition()
{
return $this->pos;
Expand Down Expand Up @@ -163,19 +153,43 @@ public function copyUntil($string, $char = false, $escape = false)
$this->pos = $this->size;
return $return;
}

if ($position == $this->pos)
{
// we are at the right place
return '';
}

$return = substr($this->content, $this->pos, $position - $this->pos);
// set the new position
$this->pos = $position;
return $return;
}

/**
* Copies the content until the string is found and return it
* unless the 'unless' is found in the substring.
*
* @param string $string
* @param string $unless
* @return string
*/
public function copyUntilUnless($string, $unless)
{
$lastPos = $this->pos;
$this->fastForward(1);
$foundString = $this->copyUntil($string, true, true);

$position = strcspn($foundString, $unless);
if ($position == strlen($foundString))
{
return $string.$foundString;
}
// rewind changes and return nothing
$this->pos = $lastPos;
return '';
}

/**
* Copies the content until it reaches the token string.,
*
Expand All @@ -202,7 +216,7 @@ public function skip($string, $copy = false)
{
$len = strspn($this->content, $string, $this->pos);

// make it chain-able if they don't want a copy
// make it chainable if they don't want a copy
$return = $this;
if ($copy)
{
Expand Down
22 changes: 17 additions & 5 deletions src/PHPHtmlParser/Dom.php
Original file line number Diff line number Diff line change
Expand Up @@ -498,12 +498,12 @@ protected function parseTag()
$space = $this->content->skipByToken('blank', true);
if (empty($space))
{
break;
$this->content->fastForward(1);
continue;
}

$name = $this->content->copyByToken('equal', true);
if ($name == '/' OR
empty($name))
if ($name == '/')
{
break;
}
Expand All @@ -525,14 +525,26 @@ protected function parseTag()
case '"':
$attr['doubleQuote'] = true;
$this->content->fastForward(1);
$attr['value'] = $this->content->copyUntil('"', false, true);
$string = $this->content->copyUntil('"', true, true);
do
{
$moreString = $this->content->copyUntilUnless('"', '=>');
$string .= $moreString;
} while( ! empty($moreString));
$attr['value'] = $string;
$this->content->fastForward(1);
$node->getTag()->$name = $attr;
break;
case "'":
$attr['doubleQuote'] = false;
$this->content->fastForward(1);
$attr['value'] = $this->content->copyUntil("'", false, true);
$string = $this->content->copyUntil("'", true, true);
do
{
$moreString = $this->content->copyUntilUnless("'", '=>');
$string .= $moreString;
} while( ! empty($moreString));
$attr['value'] = $string;
$this->content->fastForward(1);
$node->getTag()->$name = $attr;
break;
Expand Down
14 changes: 14 additions & 0 deletions tests/DomTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,18 @@ public function testScriptCleanerScriptTag()
<p>....</p>');
$this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml);
}

public function testMultipleDoubleQuotes()
{
$dom = new Dom;
$dom->load('<a title="This is a "test" of double quotes" href="http://www.example.com">Hello</a>');
$this->assertEquals('This is a "test" of double quotes', $dom->getElementsByTag('a')[0]->title);
}

public function testMultipleSingleQuotes()
{
$dom = new Dom;
$dom->load("<a title='Ain't this the best' href=\"http://www.example.com\">Hello</a>");
$this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title);
}
}

0 comments on commit 6900951

Please sign in to comment.