Permalink
Browse files

MDL-25826 integrate HTMLPurifier 4.3.0 and improve performance

The new HTMLPurifier finally caches the schema properly eliminating both extra CPU cycles and disk writes. The repeated dir exists tests might cause problems on NFS shares.
  • Loading branch information...
1 parent 88efb58 commit 7df50029b447957d52227c9a8019538eb8e8c68e @skodak skodak committed Apr 9, 2011
Showing with 15 additions and 13 deletions.
  1. 0 lib/simpletest/testpurifier.php
  2. +1 −1 lib/thirdpartylibs.xml
  3. +14 −12 lib/weblib.php
No changes.
@@ -67,7 +67,7 @@
<location>htmlpurifier</location>
<name>HTML Purifier</name>
<license>LGPL</license>
- <version>4.2.0</version>
+ <version>4.3.0</version>
<licenseversion>2.1+</licenseversion>
</library>
<library>
View
@@ -1509,20 +1509,21 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) {
function purify_html($text, $options = array()) {
global $CFG;
- // this can not be done only once because we sometimes need to reset the cache
- $cachedir = $CFG->dataroot.'/cache/htmlpurifier';
- check_dir_exists($cachedir);
-
$type = !empty($options['allowid']) ? 'allowid' : 'normal';
static $purifiers = array();
if (empty($purifiers[$type])) {
+
+ // make sure the serializer dir exists, it should be fine if it disappears later during cache reset
+ $cachedir = $CFG->dataroot.'/cache/htmlpurifier';
+ check_dir_exists($cachedir);
+
require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.safe-includes.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML.DefinitionID', 'moodlehtml');
- $config->set('HTML.DefinitionRev', 1);
+ $config->set('HTML.DefinitionRev', 2);
$config->set('Cache.SerializerPath', $cachedir);
- //$config->set('Cache.SerializerPermission', $CFG->directorypermissions); // it would be nice to get this upstream
+ $config->set('Cache.SerializerPermissions', $CFG->directorypermissions);
$config->set('Core.NormalizeNewlines', false);
$config->set('Core.ConvertDocumentToFragment', true);
$config->set('Core.Encoding', 'UTF-8');
@@ -1540,12 +1541,13 @@ function purify_html($text, $options = array()) {
$config->set('Attr.EnableID', true);
}
- $def = $config->getHTMLDefinition(true);
- $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
- $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
- $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
- $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old anf future style multilang - only our hacked lang attribute
- $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
+ if ($def = $config->maybeGetRawHTMLDefinition()) {
+ $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
+ $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
+ $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
+ $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old and future style multilang - only our hacked lang attribute
+ $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
+ }
$purifier = new HTMLPurifier($config);
$purifiers[$type] = $purifier;

0 comments on commit 7df5002

Please sign in to comment.