Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

API Make HTMLValue replace-able via DI

Extracted common code out to SS_HTMLValue and made abstract, then
put HTML 4 specific code in SS_HTML4Value. Its now possible to
replace HTMLValue with one designed for HTML 5 or XHTML

Requires a code change from new SS_HTMLValue to
Injector::inst()->create(HTMLValue)
  • Loading branch information...
commit 168f07149975e76de6dbf17039f3a5279da05e65 1 parent 8b2a911
@hafriedlander hafriedlander authored sminnee committed
View
3  _config/html.yml
@@ -0,0 +1,3 @@
+Injector:
+ HTMLValue:
+ class: SS_HTML4Value
View
2  core/Diff.php
@@ -686,7 +686,7 @@ public static function cleanHTML($content, $cleaner=null) {
$content = $cleaner->cleanHTML($content);
} else {
// At most basic level of cleaning, use DOMDocument to save valid XML.
- $doc = new SS_HTMLValue($content);
+ $doc = Injector::inst()->create('HTMLValue', $content);
$content = $doc->getContent();
}
View
2  core/HTMLCleaner.php
@@ -69,7 +69,7 @@ class PurifierHTMLCleaner extends HTMLCleaner {
public function cleanHTML($content) {
$html = new HTMLPurifier();
- $doc = new SS_HTMLValue($html->purify($content));
+ $doc = Injector::inst()->create('HTMLValue', $html->purify($content));
return $doc->getContent();
}
}
View
6 forms/HtmlEditorField.php
@@ -56,8 +56,8 @@ public function __construct($name, $title = null, $value = '') {
*/
public function Field($properties = array()) {
// mark up broken links
- $value = new SS_HTMLValue($this->value);
-
+ $value = Injector::inst()->create('HTMLValue', $this->value);
+
if($links = $value->getElementsByTagName('a')) foreach($links as $link) {
$matches = array();
@@ -103,7 +103,7 @@ public function saveInto(DataObjectInterface $record) {
$linkedPages = array();
$linkedFiles = array();
- $htmlValue = new SS_HTMLValue($this->value);
+ $htmlValue = Injector::inst()->create('HTMLValue', $this->value);
if(class_exists('SiteTree')) {
// Populate link tracking for internal links & links to asset files.
View
184 model/HTMLValue.php
@@ -1,89 +1,99 @@
<?php
+
/**
- * This class acts as a wrapper around the built in DOMDocument class in order to use it to manage a HTML snippet,
- * rather than a whole document, while still exposing the DOMDocument API.
+ * This class handles the converting of HTML fragments between a string and a DOMDocument based
+ * representation.
+ *
+ * It's designed to allow dependancy injection to replace the standard HTML4 version with one that
+ * handles XHTML or HTML5 instead
*
* @package framework
* @subpackage integration
*/
-class SS_HTMLValue extends ViewableData {
-
- /**
- * @var DOMDocument
- */
- protected $document;
-
- /**
- * @param string $content
- */
- public function __construct($content = null) {
- $this->setDocument(new DOMDocument('1.0', 'UTF-8'));
- $this->setScrictErrorChecking(false);
- $this->setOutputFormatting(false);
- $this->setContent($content);
+abstract class SS_HTMLValue extends ViewableData {
+ public function __construct($fragment = null) {
+ if ($fragment) $this->setContent($fragment);
parent::__construct();
}
- /**
- * Should strict error checking be used?
- * @param boolean $bool
- */
- public function setScrictErrorChecking($bool) {
- $this->getDocument()->scrictErrorChecking = $bool;
- }
-
- /**
- * Should the output be formatted?
- * @param boolean $bool
- */
- public function setOutputFormatting($bool) {
- $this->getDocument()->formatOutput = $bool;
- }
+ abstract public function setContent($fragment);
/**
+ * @param string $content
* @return string
*/
public function getContent() {
- // strip any surrounding tags before the <body> and after the </body> which are automatically added by
- // DOMDocument. Note that we can't use the argument to saveHTML() as it's only supported in PHP 5.3.6+,
- // we support 5.3.2 as a minimum in addition to the above, trim any surrounding newlines from the output
-
- // shortcodes use square brackets which get escaped into HTML entities by saveHTML()
- // this manually replaces them back to square brackets so that the shortcodes still work correctly
- // we can't use urldecode() here, as valid characters like "+" will be incorrectly replaced with spaces
- return trim(
- preg_replace(
- array(
- '/(.*)<body>/is',
- '/<\/body>(.*)/is',
- ),
- '',
- str_replace(array('%5B', '%5D'), array('[', ']'), $this->getDocument()->saveHTML())
- )
+ $doc = clone $this->getDocument();
+ $xp = new DOMXPath($doc);
+
+ // If there's no body, the content is empty string
+ if (!$doc->getElementsByTagName('body')->length) return '';
+
+ // saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
+ // shortcodes. So first, save all the attribute values for later restoration.
+ $attrs = array(); $i = 0;
+
+ foreach ($xp->query('//body//@*') as $attr) {
+ $key = "__HTMLVALUE_".($i++);
+ $attrs[$key] = $attr->value;
+ $attr->value = $key;
+ }
+
+ // Then, call saveHTML & extract out the content from the body tag
+ $res = preg_replace(
+ array(
+ '/^(.*?)<body>/is',
+ '/<\/body>(.*?)$/isD',
+ ),
+ '',
+ $doc->saveHTML()
);
+
+ // Then replace the saved attributes with their original versions
+ $res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
+ return $attrs[$matches[0]];
+ }, $res);

This is unescaping attributes.

DBField::create_field('HTMLText', '<a href="&quot;">Test</a>')->forTemplate() is ending up as <a href=""">Test</a>

@hafriedlander Owner

Good catch. We still need XML escaping, just not the URL escaping that saveHTML does. Fixed in #1773

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+
+ return $res;
+ }
+
+ /** @see HTMLValue::getContent() */
+ public function forTemplate() {
+ return $this->getContent();
}
+ /** @var DOMDocument */
+ private $document = null;
+ /** @var bool */
+ private $valid = true;
+
/**
- * @param string $content
- * @return bool
+ * Get the DOMDocument for the passed content
+ * @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
*/
- public function setContent($content) {
- // Ensure that \r (carriage return) characters don't get replaced with "&#13;" entity by DOMDocument
- // This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
- $content = str_replace(chr(13), '', $content);
+ public function getDocument() {
+ if (!$this->valid) {
+ return false;
+ }
+ else if ($this->document) {
+ return $this->document;
+ }
+ else {
+ $this->document = new DOMDocument('1.0', 'UTF-8');
+ $this->document->strictErrorChecking = false;
+ $this->document->formatOutput = false;
- return @$this->getDocument()->loadHTML(
- '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
- "<body>$content</body></html>"
- );
+ return $this->document;
+ }
}
/**
- * @return DOMDocument
+ * Is this HTMLValue in an errored state?
+ * @return bool
*/
- public function getDocument() {
- return $this->document;
+ public function isValid() {
+ return $this->valid;
}
/**
@@ -91,23 +101,57 @@ public function getDocument() {
*/
public function setDocument($document) {
$this->document = $document;
+ $this->valid = true;
+ }
+
+ public function setInvalid() {
+ $this->document = $this->valid = false;
+ }
+
+ /**
+ * Pass through any missed method calls to DOMDocument (if they exist)
+ * so that HTMLValue can be treated mostly like an instance of DOMDocument
+ */
+ public function __call($method, $arguments) {
+ $doc = $this->getDocument();
+
+ if(method_exists($doc, $method)) {
+ return call_user_func_array(array($doc, $method), $arguments);
+ }
+ else {
+ return parent::__call($method, $arguments);
+ }
}
/**
- * A simple convenience wrapper around DOMDocument::getElementsByTagName().
+ * Make an xpath query against this HTML
*
- * @param string $name
+ * @param $query string - The xpath query string
* @return DOMNodeList
*/
- public function getElementsByTagName($name) {
- return $this->getDocument()->getElementsByTagName($name);
+ public function query($query) {
+ $xp = new DOMXPath($this->getDocument());
+ return $xp->query($query);
}
-
+}
+
+class SS_HTML4Value extends SS_HTMLValue {
+
/**
- * @see HTMLValue::getContent()
+ * @param string $content
+ * @return bool
*/
- public function forTemplate() {
- return $this->getContent();
+ public function setContent($content) {
+ // Ensure that \r (carriage return) characters don't get replaced with "&#13;" entity by DOMDocument
+ // This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
+ $content = str_replace(chr(13), '', $content);
+
+ // Reset the document if we're in an invalid state for some reason
+ if (!$this->isValid()) $this->setDocument(null);
+
+ return @$this->getDocument()->loadHTML(
+ '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
+ "<body>$content</body></html>"
+ );
}
-
}
View
22 tests/integration/HTMLValueTest.php → tests/integration/HTML4ValueTest.php
@@ -3,10 +3,10 @@
* @package framework
* @subpackage tests
*/
-class SS_HTMLValueTest extends SapphireTest {
-
+class SS_HTML4ValueTest extends SapphireTest {
public function testInvalidHTMLSaving() {
- $value = new SS_HTMLValue();
+ $value = new SS_HTML4Value();
+
$invalid = array (
'<p>Enclosed Value</p></p>' => '<p>Enclosed Value</p>',
'<meta content="text/html"></meta>' => '<meta content="text/html">',
@@ -22,20 +22,15 @@ public function testInvalidHTMLSaving() {
}
public function testUtf8Saving() {
- $value = new SS_HTMLValue();
+ $value = new SS_HTML4Value();
+
$value->setContent('<p>ö ß ā い 家</p>');
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
}
- public function testOutputFormatting() {
- $value = new SS_HTMLValue();
- $value->setOutputFormatting(true);
- $value->setContent('<meta content="text/html">');
- $this->assertEquals('<meta content="text/html">', $value->getContent(), 'Formatted output works');
- }
-
public function testInvalidHTMLTagNames() {
- $value = new SS_HTMLValue();
+ $value = new SS_HTML4Value();
+
$invalid = array(
'<p><div><a href="test-link"></p></div>',
'<html><div><a href="test-link"></a></a></html_>',
@@ -53,7 +48,8 @@ public function testInvalidHTMLTagNames() {
}
public function testMixedNewlines() {
- $value = new SS_HTMLValue();
+ $value = new SS_HTML4Value();
+
$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
$this->assertEquals(
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
Please sign in to comment.
Something went wrong with that request. Please try again.