Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve default html sanitizer (fix #733) #1010

Merged
merged 2 commits into from
Jun 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 0 additions & 19 deletions apps/editor/src/js/convertor.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ class Convertor {
* @private
*/
_markdownToHtmlWithCodeHighlight(markdown) {
markdown = this._replaceImgAttrToDataProp(markdown);

return this.renderHTML(this.mdReader.parse(markdown));
}

Expand All @@ -68,27 +66,10 @@ class Convertor {
markdown = markdown.replace(HTML_TAG_RX, (match, $1, $2, $3) =>
match[0] !== '\\' ? `${$1}${$2} data-tomark-pass ${$3}` : match
);
markdown = this._replaceImgAttrToDataProp(markdown);

return this.renderHTML(this.mdReader.parse(markdown));
}

/**
* Replace 'onerror' attribute of img tag to data property string
* @param {string} markdown markdown text
* @returns {string} replaced markdown text
* @private
*/
_replaceImgAttrToDataProp(markdown) {
const onerrorStripeRegex = /(<img[^>]*)(onerror\s*=\s*[\\"']?[^\\"']*[\\"']?)(.*)/i;

while (onerrorStripeRegex.exec(markdown)) {
markdown = markdown.replace(onerrorStripeRegex, '$1$3');
}

return markdown;
}

/**
* Remove BR's data-tomark-pass attribute text when br in code element
* @param {string} renderedHTML Rendered HTML string from markdown editor
Expand Down
88 changes: 44 additions & 44 deletions apps/editor/src/js/htmlSanitizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,14 @@ const SVG_ATTR_LIST_RX = new RegExp(
'g'
);

const ATTR_VALUE_BLACK_LIST_RX = {
href: /^(javascript:).*/g
};
const XSS_ATTR_RX = /href|src|background/gi;
const XSS_VALUE_RX = /((java|vb|live)script|x):/gi;

/**
* htmlSanitizer
* @param {string|Node} html html or Node
* @param {boolean} [needHtmlText] pass true if need html text
* @returns {string|DocumentFragment} result
* @param {string|Node} html - html or Node
* @param {boolean} [needHtmlText] - pass true if need html text
* @returns {string|DocumentFragment} - result
* @ignore
*/
function htmlSanitizer(html, needHtmlText) {
Expand All @@ -59,20 +58,19 @@ function htmlSanitizer(html, needHtmlText) {

removeUnnecessaryTags(root);
leaveOnlyWhitelistAttribute(root);
removeInvalidAttributeValues(root);

return domUtils.finalizeHtml(root, needHtmlText);
}

/**
* Remove unnecessary tags
* Removes unnecessary tags.
* @param {HTMLElement} html - root element
* @private
* @param {HTMLElement} html root element
*/
function removeUnnecessaryTags(html) {
const removedTags = domUtils.findAll(
html,
'script, iframe, textarea, form, button, select, meta, style, link, title, embed, object, details, summary'
'script, iframe, textarea, form, button, select, input, meta, style, link, title, embed, object, details, summary'
);

removedTags.forEach(node => {
Expand All @@ -81,49 +79,51 @@ function removeUnnecessaryTags(html) {
}

/**
* Leave only white list attributes
* Checks whether the attribute and value that causing XSS or not.
* @param {string} attrName - name of attribute
* @param {string} attrValue - value of attirbute
* @param {boolean} state
* @private
* @param {HTMLElement} html root element
*/
function leaveOnlyWhitelistAttribute(html) {
domUtils.findAll(html, '*').forEach(node => {
const attrs = node.attributes;
const blacklist = toArray(attrs).filter(attr => {
const isHTMLAttr = attr.name.match(HTML_ATTR_LIST_RX);
const isSVGAttr = attr.name.match(SVG_ATTR_LIST_RX);

return !isHTMLAttr && !isSVGAttr;
});
function isXSSAttribute(attrName, attrValue) {
return attrName.match(XSS_ATTR_RX) && attrValue.match(XSS_VALUE_RX);
seonim-ryu marked this conversation as resolved.
Show resolved Hide resolved
}

toArray(blacklist).forEach(attr => {
// Edge svg attribute name returns uppercase bug. error guard.
// https://developer.microsoft.com/en-us/microsoft-edge/platform/issues/5579311/
if (attrs.getNamedItem(attr.name)) {
attrs.removeNamedItem(attr.name);
}
});
/**
* Removes attributes of blacklist.
* @param {NamedNodeMap} nodeAttrs - all attributes of node
* @param {NamedNodeMap} blacklistAttrs - attributes of blacklist
* @private
*/
function removeBlacklistAttributes(nodeAttrs, blacklistAttrs) {
toArray(blacklistAttrs).forEach(({ name }) => {
// Edge svg attribute name returns uppercase bug. error guard.
// https://developer.microsoft.com/en-us/microsoft-edge/platform/issues/5579311/
if (nodeAttrs.getNamedItem(name)) {
nodeAttrs.removeNamedItem(name);
}
});
}

/**
* Remove invalid attribute values
* Leaves only white list attributes.
* @param {HTMLElement} html - root element
* @private
* @param {HTMLElement} html root element
*/
function removeInvalidAttributeValues(html) {
for (const attr in ATTR_VALUE_BLACK_LIST_RX) {
if (ATTR_VALUE_BLACK_LIST_RX.hasOwnProperty(attr)) {
domUtils.findAll(html, `[${attr}]`).forEach(node => {
const attrs = node.attributes;
const valueBlackListRX = ATTR_VALUE_BLACK_LIST_RX[attr];
const attrItem = attrs.getNamedItem(attr);

if (valueBlackListRX && attrItem && attrItem.value.toLowerCase().match(valueBlackListRX)) {
attrs.removeNamedItem(attr);
}
});
}
}
function leaveOnlyWhitelistAttribute(html) {
domUtils.findAll(html, '*').forEach(node => {
const { attributes } = node;
const blacklist = toArray(attributes).filter(attr => {
const { name, value } = attr;
const htmlAttr = name.match(HTML_ATTR_LIST_RX);
const svgAttr = name.match(SVG_ATTR_LIST_RX);
const xssAttr = htmlAttr && isXSSAttribute(name, value);

return (!htmlAttr && !svgAttr) || xssAttr;
});

removeBlacklistAttributes(attributes, blacklist);
});
}

export default htmlSanitizer;
69 changes: 52 additions & 17 deletions apps/editor/test/unit/htmlSanitizer.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ import htmlSanitizer from '@/htmlSanitizer';

describe('htmlSanitizer', function() {
describe('tags', function() {
it('Remove unnecessary tags', function() {
it('removes unnecessary tags', function() {
expect(htmlSanitizer('<script>alert("test");</script>', true)).toBe('');
expect(htmlSanitizer('<embed>child alive</embed>', true)).toBe('child alive');
expect(htmlSanitizer('<object>child die</object>', true)).toBe('');
expect(htmlSanitizer('<details><summary>foo</summary></details>', true)).toBe('');
expect(htmlSanitizer('<input type="image" />', true)).toBe('');
});
});

describe('attributes', () => {
it('Remove all attritube but white list', () => {
it('removes all attritube but white list', () => {
const html =
'<img style="display:inline" class="V" title="V" data-custom="V" src="http://www.nhn.com/renewal/img/ci_nhn.png" onload="dd=1" />';
'<img style="display:inline" class="V" title="V" data-custom="V" src="http://www.nhn.com/renewal/img/ci_nhn.png" onload="dd=1" onerror="javascript:alert();"/>';
const dom = htmlSanitizer(html);

const $img = $(dom).find('img');
Expand All @@ -31,9 +32,10 @@ describe('htmlSanitizer', function() {
expect($img.attr('data-custom')).toBeTruthy();

expect($img.attr('onload')).not.toBeDefined();
expect($img.attr('onerror')).not.toBeDefined();
});

it('Leave svg attributes', function() {
it('leaves svg attributes', () => {
const html =
'<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>';
const dom = htmlSanitizer(html);
Expand All @@ -47,19 +49,52 @@ describe('htmlSanitizer', function() {
expect($circle.attr('fill')).toBeTruthy();
});

it('Remove attributes with invalid value', function() {
expect(htmlSanitizer('<a href="javascript:alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href="JaVaScRiPt:alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href="#">benign</a>', true)).toBe('<a href="#">benign</a>');
expect(htmlSanitizer('<a href="http://example.com">http</a>', true)).toBe(
'<a href="http://example.com">http</a>'
);
expect(htmlSanitizer('<a href="https://example.com">https</a>', true)).toBe(
'<a href="https://example.com">https</a>'
);
expect(htmlSanitizer('<a href="ftp://example.com">ftp</a>', true)).toBe(
'<a href="ftp://example.com">ftp</a>'
);
describe('removes attributes with invalid value including xss script', () => {
it('table', () => {
expect(htmlSanitizer(`<TABLE BACKGROUND="javascript:alert('XSS')">`, true)).toBe(
'<table></table>'
);
expect(htmlSanitizer(`<TABLE><TD BACKGROUND="javascript:alert('XSS')">`, true)).toBe(
'<table><tbody><tr><td></td></tr></tbody></table>'
);
});

it('href attribute with a tag', () => {
expect(htmlSanitizer('<a href="javascript:alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href=" JaVaScRiPt: alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href="vbscript:alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href=" VBscript: alert(); ">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href="livescript:alert();">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer('<a href=" LIVEScript: alert() ;">xss</a>', true)).toBe('<a>xss</a>');
expect(htmlSanitizer(`123<a href=' javascript:alert();'>xss</a>`, true)).toBe(
'123<a>xss</a>'
);
expect(htmlSanitizer(`<a href='javas<!-- -->cript:alert()'>xss</a>`, true)).toBe(
'<a>xss</a>'
);
});

it('src attribute with img tag', () => {
expect(htmlSanitizer('<img src="javascript:alert();">', true)).toBe('<img>');
expect(htmlSanitizer('<img src=" JaVaScRiPt: alert();">', true)).toBe('<img>');
expect(htmlSanitizer('<img src="vbscript:alert();">', true)).toBe('<img>');
expect(htmlSanitizer('<img src=" VBscript: alert(); ">', true)).toBe('<img>');
expect(htmlSanitizer('<img src=" LIVEScript: alert() ;">', true)).toBe('<img>');
expect(htmlSanitizer('<img src="java<!-- -->script:alert();">', true)).toBe('<img>');
});

it('src and onerror attribute with img tag', () => {
expect(
htmlSanitizer(
'<img src = x onerror = "javascript: window.onerror = alert; throw XSS">',
true
)
).toBe('<img src="x">');
expect(htmlSanitizer('"><img src="x:x" onerror="alert(XSS)">', true)).toBe('"&gt;<img>');
expect(htmlSanitizer('<img src=x:alert(alt) onerror=eval(src) alt=0>', true)).toBe(
'<img alt="0">'
);
});
});
});
});