Skip to content
Permalink
Browse files

MDL-49564 atto: Improve empty span removal

Paste from MS word, followed by cleaning, may leave many many unused
spans. Try to remove them.
  • Loading branch information...
merrill-oakland committed Mar 18, 2015
1 parent 0582523 commit b365d5cc2ddb6531e240a4d0a029caf3a1c07b23
@@ -870,9 +870,7 @@ EditorClean.prototype = {
// Remove Apple- classes in class attributes. Only removes one or more that appear in succession.
{regex: /(<[^>]*?class\s*?=\s*?"[^>"]*?)(?:[\s]*Apple-[_a-zA-Z0-9\-]*)+/gi, replace: "$1"},
// Remove OLE_LINK# anchors that may litter the code.
{regex: /<a [^>]*?name\s*?=\s*?"OLE_LINK\d*?"[^>]*?>\s*?<\/a>/gi, replace: ""},
// Remove empty spans, but not ones from Rangy.
{regex: /<span(?![^>]*?rangySelectionBoundary[^>]*?)[^>]*>(&nbsp;|\s)*<\/span>/gi, replace: ""}
{regex: /<a [^>]*?name\s*?=\s*?"OLE_LINK\d*?"[^>]*?>\s*?<\/a>/gi, replace: ""}
];

// Apply the rules.
@@ -881,7 +879,63 @@ EditorClean.prototype = {
// Reapply the standard cleaner to the content.
content = this._cleanHTML(content);

// Clean unused spans out of the content.
content = this._cleanSpans(content);

return content;
},

/**
* Clean empty or un-unused spans from passed HTML.
*
* This code intentionally doesn't use YUI Nodes. YUI was quite a bit slower at this, so using raw DOM objects instead.
*
* @method _cleanSpans
* @private
* @param {String} content The content to clean
* @return {String} The cleaned HTML
*/
_cleanSpans: function(content) {
// Return an empty string if passed an invalid or empty object.
if (!content || content.length === 0) {
return "";
}
// Check if the string is empty or only contains whitespace.
if (content.length === 0 || !content.match(/\S/)) {
return content;
}

var rules = [
// Remove unused class, style, or id attributes. This will make empty tag detection easier later.
{regex: /(<[^>]*?)(?:[\s]*(?:class|style|id)\s*?=\s*?"\s*?")+/gi, replace: "$1"}
];
// Apply the rules.
content = this._filterContentWithRules(content, rules);

// Reference: "http://stackoverflow.com/questions/8131396/remove-nested-span-without-id"

// This is better to run detached from the DOM, so the browser doesn't try to update on each change.
var holder = document.createElement('div');
holder.innerHTML = content;
var spans = holder.getElementsByTagName('span');

// Since we will be removing elements from the list, we should copy it to an array, making it static.
var spansarr = Array.prototype.slice.call(spans, 0);

spansarr.forEach(function(span) {
if (!span.hasAttributes()) {
// If no attributes (id, class, style, etc), this span is has no effect.
// Move each child (if they exist) to the parent in place of this span.
while (span.firstChild) {
span.parentNode.insertBefore(span.firstChild, span);
}

// Remove the now empty span.
span.parentNode.removeChild(span);
}
});

return holder.innerHTML;
}
};

0 comments on commit b365d5c

Please sign in to comment.
You can’t perform that action at this time.