Skip to content

Commit

Permalink
Update URL sanitizer to allow more protocols (twbs#38531)
Browse files Browse the repository at this point in the history
Co-authored-by: XhmikosR <xhmikosr@gmail.com>
  • Loading branch information
2 people authored and romankupchak93 committed Jan 5, 2024
1 parent 501c11f commit 6110bb3
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 52 deletions.
78 changes: 36 additions & 42 deletions js/src/util/sanitizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,6 @@
* --------------------------------------------------------------------------
*/

const uriAttributes = new Set([
'background',
'cite',
'href',
'itemtype',
'longdesc',
'poster',
'src',
'xlink:href'
])

/**
* A pattern that recognizes a commonly useful subset of URLs that are safe.
*
* Shout-out to Angular https://github.com/angular/angular/blob/12.2.x/packages/core/src/sanitization/url_sanitizer.ts
*/
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file|sms):|[^#&/:?]*(?:[#/?]|$))/i

/**
* A pattern that matches safe data URLs. Only matches image, video and audio types.
*
* Shout-out to Angular https://github.com/angular/angular/blob/12.2.x/packages/core/src/sanitization/url_sanitizer.ts
*/
const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm)|audio\/(?:mp3|oga|ogg|opus));base64,[\d+/a-z]+=*$/i

const allowedAttribute = (attribute, allowedAttributeList) => {
const attributeName = attribute.nodeName.toLowerCase()

if (allowedAttributeList.includes(attributeName)) {
if (uriAttributes.has(attributeName)) {
return Boolean(SAFE_URL_PATTERN.test(attribute.nodeValue) || DATA_URL_PATTERN.test(attribute.nodeValue))
}

return true
}

// Check if a regular expression validates the attribute.
return allowedAttributeList.filter(attributeRegex => attributeRegex instanceof RegExp)
.some(regex => regex.test(attributeName))
}

// js-docs-start allow-list
const ARIA_ATTRIBUTE_PATTERN = /^aria-[\w-]*$/i

Expand Down Expand Up @@ -84,6 +43,42 @@ export const DefaultAllowlist = {
}
// js-docs-end allow-list

const uriAttributes = new Set([
'background',
'cite',
'href',
'itemtype',
'longdesc',
'poster',
'src',
'xlink:href'
])

/**
* A pattern that recognizes URLs that are safe wrt. XSS in URL navigation
* contexts.
*
* Shout-out to Angular https://github.com/angular/angular/blob/15.2.8/packages/core/src/sanitization/url_sanitizer.ts#L38
*/
// eslint-disable-next-line unicorn/better-regex
const SAFE_URL_PATTERN = /^(?!javascript:)(?:[a-z0-9+.-]+:|[^&:/?#]*(?:[/?#]|$))/i

const allowedAttribute = (attribute, allowedAttributeList) => {
const attributeName = attribute.nodeName.toLowerCase()

if (allowedAttributeList.includes(attributeName)) {
if (uriAttributes.has(attributeName)) {
return Boolean(SAFE_URL_PATTERN.test(attribute.nodeValue))
}

return true
}

// Check if a regular expression validates the attribute.
return allowedAttributeList.filter(attributeRegex => attributeRegex instanceof RegExp)
.some(regex => regex.test(attributeName))
}

export function sanitizeHtml(unsafeHtml, allowList, sanitizeFunction) {
if (!unsafeHtml.length) {
return unsafeHtml
Expand All @@ -102,7 +97,6 @@ export function sanitizeHtml(unsafeHtml, allowList, sanitizeFunction) {

if (!Object.keys(allowList).includes(elementName)) {
element.remove()

continue
}

Expand Down
78 changes: 68 additions & 10 deletions js/tests/unit/util/sanitizer.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,75 @@ describe('Sanitizer', () => {
expect(result).toEqual(empty)
})

it('should sanitize template by removing tags with XSS', () => {
const template = [
'<div>',
' <a href="javascript:alert(7)">Click me</a>',
' <span>Some content</span>',
'</div>'
].join('')

const result = sanitizeHtml(template, DefaultAllowlist, null)
it('should retain tags with valid URLs', () => {
const validUrls = [
'',
'http://abc',
'HTTP://abc',
'https://abc',
'HTTPS://abc',
'ftp://abc',
'FTP://abc',
'mailto:me@example.com',
'MAILTO:me@example.com',
'tel:123-123-1234',
'TEL:123-123-1234',
'sip:me@example.com',
'SIP:me@example.com',
'#anchor',
'/page1.md',
'http://JavaScript/my.js',
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/', // Truncated.
'data:video/webm;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/',
'data:audio/opus;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/',
'unknown-scheme:abc'
]

for (const url of validUrls) {
const template = [
'<div>',
` <a href="${url}">Click me</a>`,
' <span>Some content</span>',
'</div>'
].join('')

const result = sanitizeHtml(template, DefaultAllowlist, null)

expect(result).toContain(`href="${url}"`)
}
})

expect(result).not.toContain('href="javascript:alert(7)')
it('should sanitize template by removing tags with XSS', () => {
const invalidUrls = [
// eslint-disable-next-line no-script-url
'javascript:alert(7)',
// eslint-disable-next-line no-script-url
'javascript:evil()',
// eslint-disable-next-line no-script-url
'JavaScript:abc',
' javascript:abc',
' \n Java\n Script:abc',
'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;',
'&#106&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;',
'&#106 &#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;',
'&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058',
'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A;',
'jav&#x09;ascript:alert();',
'jav\u0000ascript:alert();'
]

for (const url of invalidUrls) {
const template = [
'<div>',
` <a href="${url}">Click me</a>`,
' <span>Some content</span>',
'</div>'
].join('')

const result = sanitizeHtml(template, DefaultAllowlist, null)

expect(result).not.toContain(`href="${url}"`)
}
})

it('should sanitize template and work with multiple regex', () => {
Expand Down

0 comments on commit 6110bb3

Please sign in to comment.