Browse files

MDL-25826 integrate HTMLPurifier 4.3.0 and improve performance

The new HTMLPurifier finally caches the schema properly eliminating both extra CPU cycles and disk writes. The repeated dir exists tests might cause problems on NFS shares.
  • Loading branch information...
1 parent 8adf19e commit fd8fa6f9d0ee8090b5763612fabd581df5fef6d0 @skodak skodak committed Apr 9, 2011
Showing with 15 additions and 13 deletions.
  1. 0 lib/simpletest/testpurifier.php
  2. +1 −1 lib/thirdpartylibs.xml
  3. +14 −12 lib/weblib.php
View
0 lib/simpletest/testpurifier.php
No changes.
View
2 lib/thirdpartylibs.xml
@@ -67,7 +67,7 @@
<location>htmlpurifier</location>
<name>HTML Purifier</name>
<license>LGPL</license>
- <version>4.2.0</version>
+ <version>4.3.0</version>
<licenseversion>2.1+</licenseversion>
</library>
<library>
View
26 lib/weblib.php
@@ -1509,20 +1509,21 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) {
function purify_html($text, $options = array()) {
global $CFG;
- // this can not be done only once because we sometimes need to reset the cache
- $cachedir = $CFG->dataroot.'/cache/htmlpurifier';
- check_dir_exists($cachedir);
-
$type = !empty($options['allowid']) ? 'allowid' : 'normal';
static $purifiers = array();
if (empty($purifiers[$type])) {
+
+ // make sure the serializer dir exists, it should be fine if it disappears later during cache reset
+ $cachedir = $CFG->dataroot.'/cache/htmlpurifier';
+ check_dir_exists($cachedir);
+
require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.safe-includes.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML.DefinitionID', 'moodlehtml');
- $config->set('HTML.DefinitionRev', 1);
+ $config->set('HTML.DefinitionRev', 2);
$config->set('Cache.SerializerPath', $cachedir);
- //$config->set('Cache.SerializerPermission', $CFG->directorypermissions); // it would be nice to get this upstream
+ $config->set('Cache.SerializerPermissions', $CFG->directorypermissions);
$config->set('Core.NormalizeNewlines', false);
$config->set('Core.ConvertDocumentToFragment', true);
$config->set('Core.Encoding', 'UTF-8');
@@ -1540,12 +1541,13 @@ function purify_html($text, $options = array()) {
$config->set('Attr.EnableID', true);
}
- $def = $config->getHTMLDefinition(true);
- $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
- $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
- $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
- $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old anf future style multilang - only our hacked lang attribute
- $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
+ if ($def = $config->maybeGetRawHTMLDefinition()) {
+ $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
+ $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
+ $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
+ $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old and future style multilang - only our hacked lang attribute
+ $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
+ }
$purifier = new HTMLPurifier($config);
$purifiers[$type] = $purifier;

0 comments on commit fd8fa6f

Please sign in to comment.