@@ -97,7 +97,20 @@ class rcube_washtml
9797 'tbody ' , 'td ' , 'tfoot ' , 'th ' , 'thead ' , 'tr ' , 'tt ' , 'u ' , 'ul ' , 'var ' , 'wbr ' , 'img ' ,
9898 'video ' , 'source ' ,
9999 // form elements
100- 'button ' , 'input ' , 'textarea ' , 'select ' , 'option ' , 'optgroup '
100+ 'button ' , 'input ' , 'textarea ' , 'select ' , 'option ' , 'optgroup ' ,
101+ // SVG
102+ 'svg ' , 'altglyph ' , 'altglyphdef ' , 'altglyphitem ' , 'animate ' , 'animatecolor ' ,
103+ 'animatemotion ' , 'animatetransform ' , 'circle ' , 'clippath ' , 'defs ' , 'desc ' ,
104+ 'ellipse ' , 'font ' , 'g ' , 'glyph ' , 'glyphref ' , 'hkern ' , 'image ' , 'line ' ,
105+ 'lineargradient ' , 'marker ' , 'mask ' , 'mpath ' , 'path ' , 'pattern ' ,
106+ 'polygon ' , 'polyline ' , 'radialgradient ' , 'rect ' , 'set ' , 'stop ' , 'switch ' , 'symbol ' ,
107+ 'text ' , 'textpath ' , 'tref ' , 'tspan ' , 'use ' , 'view ' , 'vkern ' , 'filter ' ,
108+ // SVG Filters
109+ 'feblend ' , 'fecolormatrix ' , 'fecomponenttransfer ' , 'fecomposite ' ,
110+ 'feconvolvematrix ' , 'fediffuselighting ' , 'fedisplacementmap ' ,
111+ 'feflood ' , 'fefunca ' , 'fefuncb ' , 'fefuncg ' , 'fefuncr ' , 'fegaussianblur ' ,
112+ 'feimage ' , 'femerge ' , 'femergenode ' , 'femorphology ' , 'feoffset ' ,
113+ 'fespecularlighting ' , 'fetile ' , 'feturbulence ' ,
101114 );
102115
103116 /* Ignore these HTML tags and their content */
@@ -111,14 +124,53 @@ class rcube_washtml
111124 'abbr ' , 'char ' , 'charoff ' , 'clear ' , 'compact ' , 'coords ' , 'vspace ' , 'hspace ' ,
112125 'cellborder ' , 'size ' , 'lang ' , 'dir ' , 'usemap ' , 'shape ' , 'media ' ,
113126 // attributes of form elements
114- 'type ' , 'rows ' , 'cols ' , 'disabled ' , 'readonly ' , 'checked ' , 'multiple ' , 'value '
127+ 'type ' , 'rows ' , 'cols ' , 'disabled ' , 'readonly ' , 'checked ' , 'multiple ' , 'value ' ,
128+ // SVG
129+ 'accent-height ' , 'accumulate ' , 'additivive ' , 'alignment-baseline ' ,
130+ 'ascent ' , 'attributename ' , 'attributetype ' , 'azimuth ' , 'basefrequency ' , 'baseprofile ' ,
131+ 'baseline-shift ' , 'begin ' , 'bias ' , 'by ' , 'clip ' , 'clip-path ' , 'clip-rule ' ,
132+ 'color ' , 'color-interpolation ' , 'color-interpolation-filters ' , 'color-profile ' ,
133+ 'color-rendering ' , 'cx ' , 'cy ' , 'd ' , 'dx ' , 'dy ' , 'diffuseconstant ' , 'direction ' ,
134+ 'display ' , 'divisor ' , 'dur ' , 'edgemode ' , 'elevation ' , 'end ' , 'fill ' , 'fill-opacity ' ,
135+ 'fill-rule ' , 'filter ' , 'flood-color ' , 'flood-opacity ' , 'font-family ' , 'font-size ' ,
136+ 'font-size-adjust ' , 'font-stretch ' , 'font-style ' , 'font-variant ' , 'font-weight ' ,
137+ 'fx ' , 'fy ' , 'g1 ' , 'g2 ' , 'glyph-name ' , 'glyphref ' , 'gradientunits ' , 'gradienttransform ' ,
138+ 'image-rendering ' , 'in ' , 'in2 ' , 'k ' , 'k1 ' , 'k2 ' , 'k3 ' , 'k4 ' , 'kerning ' , 'keypoints ' ,
139+ 'keysplines ' , 'keytimes ' , 'lengthadjust ' , 'letter-spacing ' , 'kernelmatrix ' ,
140+ 'kernelunitlength ' , 'lighting-color ' , 'local ' , 'marker-end ' , 'marker-mid ' ,
141+ 'marker-start ' , 'markerheight ' , 'markerunits ' , 'markerwidth ' , 'maskcontentunits ' ,
142+ 'maskunits ' , 'max ' , 'mask ' , 'mode ' , 'min ' , 'numoctaves ' , 'offset ' , 'operator ' ,
143+ 'opacity ' , 'order ' , 'orient ' , 'orientation ' , 'origin ' , 'overflow ' , 'paint-order ' ,
144+ 'path ' , 'pathlength ' , 'patterncontentunits ' , 'patterntransform ' , 'patternunits ' ,
145+ 'points ' , 'preservealpha ' , 'r ' , 'rx ' , 'ry ' , 'radius ' , 'refx ' , 'refy ' , 'repeatcount ' ,
146+ 'repeatdur ' , 'restart ' , 'rotate ' , 'scale ' , 'seed ' , 'shape-rendering ' , 'specularconstant ' ,
147+ 'specularexponent ' , 'spreadmethod ' , 'stddeviation ' , 'stitchtiles ' , 'stop-color ' ,
148+ 'stop-opacity ' , 'stroke-dasharray ' , 'stroke-dashoffset ' , 'stroke-linecap ' ,
149+ 'stroke-linejoin ' , 'stroke-miterlimit ' , 'stroke-opacity ' , 'stroke ' , 'stroke-width ' ,
150+ 'surfacescale ' , 'targetx ' , 'targety ' , 'transform ' , 'text-anchor ' , 'text-decoration ' ,
151+ 'text-rendering ' , 'textlength ' , 'u1 ' , 'u2 ' , 'unicode ' , 'values ' , 'viewbox ' ,
152+ 'visibility ' , 'vert-adv-y ' , 'version ' , 'vert-origin-x ' , 'vert-origin-y ' , 'word-spacing ' ,
153+ 'wrap ' , 'writing-mode ' , 'xchannelselector ' , 'ychannelselector ' , 'x ' , 'x1 ' , 'x2 ' ,
154+ 'xmlns ' , 'y ' , 'y1 ' , 'y2 ' , 'z ' , 'zoomandpan ' ,
155+ // XML
156+ 'xml:id ' , 'xlink:title '
115157 );
116158
117159 /* Elements which could be empty and be returned in short form (<tag />) */
118160 static $ void_elements = array ('area ' , 'base ' , 'br ' , 'col ' , 'command ' , 'embed ' , 'hr ' ,
119- 'img ' , 'input ' , 'keygen ' , 'link ' , 'meta ' , 'param ' , 'source ' , 'track ' , 'wbr '
161+ 'img ' , 'input ' , 'keygen ' , 'link ' , 'meta ' , 'param ' , 'source ' , 'track ' , 'wbr ' ,
162+ // SVG
163+ 'altglyph ' , 'altglyphdef ' , 'altglyphitem ' , 'animate ' , 'animatecolor ' ,
164+ 'animatemotion ' , 'animatetransform ' , 'circle ' , 'clippath ' , 'defs ' , 'desc ' ,
165+ 'ellipse ' , 'font ' , 'g ' , 'glyph ' , 'glyphref ' , 'hkern ' , 'image ' , 'line ' ,
166+ 'lineargradient ' , 'marker ' , 'mask ' , 'mpath ' , 'path ' , 'pattern ' ,
167+ 'polygon ' , 'polyline ' , 'radialgradient ' , 'rect ' , 'set ' , 'stop ' , 'switch ' , 'symbol ' ,
168+ 'text ' , 'textpath ' , 'tref ' , 'tspan ' , 'use ' , 'view ' , 'vkern ' , 'filter ' ,
120169 );
121170
171+ /* Attributes that may contain insecure content */
172+ static $ insecure_attribs = array ('href ' , 'to ' , 'from ' );
173+
122174 /* State for linked objects in HTML */
123175 public $ extlinks = false ;
124176
@@ -149,10 +201,11 @@ class rcube_washtml
149201 */
150202 public function __construct ($ p = array ())
151203 {
152- $ this ->_html_elements = array_flip ((array )$ p ['html_elements ' ]) + array_flip (self ::$ html_elements ) ;
153- $ this ->_html_attribs = array_flip ((array )$ p ['html_attribs ' ]) + array_flip (self ::$ html_attribs );
154- $ this ->_ignore_elements = array_flip ((array )$ p ['ignore_elements ' ]) + array_flip (self ::$ ignore_elements );
155- $ this ->_void_elements = array_flip ((array )$ p ['void_elements ' ]) + array_flip (self ::$ void_elements );
204+ $ this ->_html_elements = array_flip ((array )$ p ['html_elements ' ]) + array_flip (self ::$ html_elements );
205+ $ this ->_html_attribs = array_flip ((array )$ p ['html_attribs ' ]) + array_flip (self ::$ html_attribs );
206+ $ this ->_insecure_attribs = array_flip ((array )$ p ['insecure_attribs ' ]) + array_flip (self ::$ insecure_attribs );
207+ $ this ->_ignore_elements = array_flip ((array )$ p ['ignore_elements ' ]) + array_flip (self ::$ ignore_elements );
208+ $ this ->_void_elements = array_flip ((array )$ p ['void_elements ' ]) + array_flip (self ::$ void_elements );
156209
157210 unset($ p ['html_elements ' ], $ p ['html_attribs ' ], $ p ['ignore_elements ' ], $ p ['void_elements ' ]);
158211
@@ -232,25 +285,26 @@ private function wash_style($style)
232285 */
233286 private function wash_attribs ($ node )
234287 {
235- $ t = '' ;
288+ $ t = '' ;
236289 $ washed = '' ;
237290
238- foreach ($ node ->attributes as $ key => $ plop ) {
239- $ key = strtolower ($ key );
240- $ value = $ node -> getAttribute ( $ key ) ;
291+ foreach ($ node ->attributes as $ name => $ attr ) {
292+ $ key = strtolower ($ name );
293+ $ value = $ attr -> nodeValue ;
241294
242295 if (isset ($ this ->_html_attribs [$ key ]) ||
243- ($ key == 'href ' && ($ value = trim ($ value ))
296+ (isset ($ this ->_insecure_attribs [$ key ])
297+ && ($ value = trim ($ value ))
244298 && !preg_match ('!^(javascript|vbscript|data:text)!i ' , $ value )
245299 && preg_match ('!^([a-z][a-z0-9.+-]+:|//|#).+!i ' , $ value ))
246300 ) {
247- $ t .= ' ' . $ key . '=" ' . htmlspecialchars ($ value , ENT_QUOTES ) . '" ' ;
301+ $ t .= ' ' . $ attr -> nodeName . '=" ' . htmlspecialchars ($ value , ENT_QUOTES ) . '" ' ;
248302 }
249303 else if ($ key == 'style ' && ($ style = $ this ->wash_style ($ value ))) {
250304 // replace double quotes to prevent syntax error and XSS issues (#1490227)
251305 $ t .= ' style=" ' . str_replace ('" ' , '" ' , $ style ) . '" ' ;
252306 }
253- else if ($ key == 'background '
307+ else if ($ key == 'background ' || $ key == ' href '
254308 || ($ key == 'src ' && preg_match ('/^(img|source)$/i ' , $ node ->tagName ))
255309 || ($ key == 'poster ' && strtolower ($ node ->tagName ) == 'video ' )
256310 ) {
@@ -275,7 +329,7 @@ private function wash_attribs($node)
275329 }
276330 }
277331 else {
278- $ washed .= ($ washed ? ' ' : '' ) . $ key ;
332+ $ washed .= ($ washed ? ' ' : '' ) . $ attr -> nodeName ;
279333 }
280334 }
281335
@@ -322,8 +376,25 @@ private function dumpHtml($node, $level = 20)
322376 }
323377 else if (isset ($ this ->_html_elements [$ tagName ])) {
324378 $ content = $ this ->dumpHtml ($ node , $ level );
325- $ dump .= '< ' . $ tagName . $ this ->wash_attribs ($ node ) .
326- ($ content === '' && isset ($ this ->_void_elements [$ tagName ]) ? ' /> ' : "> $ content</ $ tagName> " );
379+ $ dump .= '< ' . $ tagName ;
380+
381+ if ($ tagName == 'svg ' ) {
382+ $ xpath = new DOMXPath ($ node ->ownerDocument );
383+ foreach ($ xpath ->query ('namespace::* ' ) as $ ns ) {
384+ if ($ ns ->nodeName != 'xmlns:xml ' ) {
385+ $ dump .= ' ' . $ ns ->nodeName . '=" ' . $ ns ->nodeValue . '" ' ;
386+ }
387+ }
388+ }
389+
390+ $ dump .= $ this ->wash_attribs ($ node );
391+
392+ if ($ content === '' && isset ($ this ->_void_elements [$ tagName ])) {
393+ $ dump .= ' /> ' ;
394+ }
395+ else {
396+ $ dump .= "> $ content</ $ tagName> " ;
397+ }
327398 }
328399 else if (isset ($ this ->_ignore_elements [$ tagName ])) {
329400 $ dump .= '<!-- ' . htmlspecialchars ($ tagName , ENT_QUOTES ) . ' not allowed --> ' ;
@@ -375,14 +446,18 @@ public function wash($html)
375446 // Detect max nesting level (for dumpHTML) (#1489110)
376447 $ this ->max_nesting_level = (int ) @ini_get ('xdebug.max_nesting_level ' );
377448
449+ // SVG need to be parsed as XML
450+ $ xml = stripos ($ html , '<svg ' ) !== false || stripos ($ html , '<?xml ' ) !== false ;
451+ $ method = $ xml ? 'loadXML ' : 'loadHTML ' ;
452+ $ options = 0 ;
453+
378454 // Use optimizations if supported
379455 if (PHP_VERSION_ID >= 50400 ) {
380- @$ node ->loadHTML ($ html , LIBXML_PARSEHUGE | LIBXML_COMPACT );
381- }
382- else {
383- @$ node ->loadHTML ($ html );
456+ $ options = LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NONET ;
384457 }
385458
459+ @$ node ->{$ method }($ html , $ options );
460+
386461 return $ this ->dumpHtml ($ node );
387462 }
388463
@@ -399,6 +474,8 @@ public function get_config($prop)
399474 */
400475 private function cleanup ($ html )
401476 {
477+ $ html = trim ($ html );
478+
402479 // special replacements (not properly handled by washtml class)
403480 $ html_search = array (
404481 // space(s) between <NOBR>
@@ -420,17 +497,19 @@ private function cleanup($html)
420497 '' ,
421498 '<html> ' ,
422499 );
500+
423501 $ html = preg_replace ($ html_search , $ html_replace , trim ($ html ));
424502
425- //-> Replace all of those weird MS Word quotes and other high characters
503+ // Replace all of those weird MS Word quotes and other high characters
426504 $ badwordchars = array (
427505 "\xe2\x80\x98" , // left single quote
428506 "\xe2\x80\x99" , // right single quote
429507 "\xe2\x80\x9c" , // left double quote
430508 "\xe2\x80\x9d" , // right double quote
431509 "\xe2\x80\x94" , // em dash
432- "\xe2\x80\xa6" // elipses
510+ "\xe2\x80\xa6" // elipses
433511 );
512+
434513 $ fixedwordchars = array (
435514 "' " ,
436515 "' " ,
@@ -439,6 +518,7 @@ private function cleanup($html)
439518 '— ' ,
440519 '... '
441520 );
521+
442522 $ html = str_replace ($ badwordchars , $ fixedwordchars , $ html );
443523
444524 // PCRE errors handling (#1486856), should we use something like for every preg_* use?
@@ -484,7 +564,7 @@ public static function html_tag_callback($matches)
484564 $ tagname = $ matches [2 ];
485565 $ tagname = preg_replace (array (
486566 '/:.*$/ ' , // Microsoft's Smart Tags <st1:xxxx>
487- '/[^a-z0-9_\[\]\!-]/i ' , // forbidden characters
567+ '/[^a-z0-9_\[\]\!? -]/i ' , // forbidden characters
488568 ), '' , $ tagname );
489569
490570 // fix invalid closing tags - remove any attributes (#1489446)
0 commit comments