Skip to content

Commit

Permalink
Merge pull request #2785 from zerline/HTMLSanitizer
Browse files Browse the repository at this point in the history
HTML sanitizer for descriptions.
  • Loading branch information
ibdafna committed Aug 3, 2021
2 parents 2de28de + df33205 commit 8a2b049
Show file tree
Hide file tree
Showing 16 changed files with 849 additions and 19 deletions.
6 changes: 5 additions & 1 deletion ipywidgets/widgets/widget_bool.py
Expand Up @@ -53,9 +53,13 @@ class ToggleButton(_Bool):
value : {True,False}
value of the toggle button: True-pressed, False-unpressed
description : str
description displayed next to the button
description displayed on the button
icon: str
font-awesome icon name
style: instance of DescriptionStyle
styling customizations
button_style: enum
button predefined styling
"""
_view_name = Unicode('ToggleButtonView').tag(sync=True)
_model_name = Unicode('ToggleButtonModel').tag(sync=True)
Expand Down
2 changes: 1 addition & 1 deletion ipywidgets/widgets/widget_button.py
Expand Up @@ -35,7 +35,7 @@ class Button(DOMWidget, CoreWidget):
Parameters
----------
description: str
description displayed next to the button
description displayed on the button
icon: str
font-awesome icon names, without the 'fa-' prefix
disabled: bool
Expand Down
3 changes: 2 additions & 1 deletion ipywidgets/widgets/widget_description.py
Expand Up @@ -3,7 +3,7 @@

"""Contains the DOMWidget class"""

from traitlets import Unicode
from traitlets import Bool, Unicode
from .widget import Widget, widget_serialization, register
from .trait_types import InstanceDict
from .widget_style import Style
Expand All @@ -21,6 +21,7 @@ class DescriptionWidget(DOMWidget, CoreWidget):
"""Widget that has a description label to the side."""
_model_name = Unicode('DescriptionModel').tag(sync=True)
description = Unicode('', help="Description of the control.").tag(sync=True)
description_allow_html = Bool(False, help="Accept HTML in the description.").tag(sync=True)
style = InstanceDict(DescriptionStyle, help="Styling customizations").tag(sync=True, **widget_serialization)

def _repr_keys(self):
Expand Down
4 changes: 3 additions & 1 deletion packages/base-manager/package.json
Expand Up @@ -35,14 +35,16 @@
"@jupyter-widgets/base": "^5.0.0-alpha.3",
"@jupyterlab/services": "^6.0.0",
"@lumino/coreutils": "^1.4.2",
"base64-js": "^1.2.1"
"base64-js": "^1.2.1",
"sanitize-html": "^1.20"
},
"devDependencies": {
"@types/base64-js": "^1.2.5",
"@types/chai": "^4.1.7",
"@types/chai-as-promised": "^7.1.0",
"@types/expect.js": "^0.3.29",
"@types/mocha": "^8.2.2",
"@types/sanitize-html": "^1.20",
"@types/sinon": "^10.0.2",
"@types/sinon-chai": "^3.2.2",
"chai": "^4.0.0",
Expand Down
193 changes: 193 additions & 0 deletions packages/base-manager/src/latex.ts
@@ -0,0 +1,193 @@
/*-----------------------------------------------------------------------------
| Copyright (c) Jupyter Development Team.
| Distributed under the terms of the Modified BSD License.
|----------------------------------------------------------------------------*/
// Some magic for deferring mathematical expressions to MathJax
// by hiding them from the Markdown parser.
// Some of the code here is adapted with permission from Davide Cervone
// under the terms of the Apache2 license governing the MathJax project.
// Other minor modifications are also due to StackExchange and are used with
// permission.

const inline = '$'; // the inline math delimiter

// MATHSPLIT contains the pattern for math delimiters and special symbols
// needed for searching for math in the text input.
const MATHSPLIT =
/(\$\$?|\\(?:begin|end)\{[a-z]*\*?\}|\\[{}$]|[{}]|(?:\n\s*)+|@@\d+@@|\\\\(?:\(|\)|\[|\]))/i;

/**
* Break up the text into its component parts and search
* through them for math delimiters, braces, linebreaks, etc.
* Math delimiters must match and braces must balance.
* Don't allow math to pass through a double linebreak
* (which will be a paragraph).
*/
export function removeMath(text: string): { text: string; math: string[] } {
const math: string[] = []; // stores math strings for later
let start: number | null = null;
let end: string | null = null;
let last: number | null = null;
let braces = 0;
let deTilde: (text: string) => string;

// Except for extreme edge cases, this should catch precisely those pieces of the markdown
// source that will later be turned into code spans. While MathJax will not TeXify code spans,
// we still have to consider them at this point; the following issue has happened several times:
//
// `$foo` and `$bar` are variables. --> <code>$foo ` and `$bar</code> are variables.
const hasCodeSpans = /`/.test(text);
if (hasCodeSpans) {
text = text
.replace(/~/g, '~T')
.replace(/(^|[^\\])(`+)([^\n]*?[^`\n])\2(?!`)/gm, (wholematch) =>
wholematch.replace(/\$/g, '~D')
);
deTilde = (text: string) => {
return text.replace(/~([TD])/g, (wholematch, character) =>
character === 'T' ? '~' : inline
);
};
} else {
deTilde = (text: string) => {
return text;
};
}

let blocks = text.replace(/\r\n?/g, '\n').split(MATHSPLIT);

for (let i = 1, m = blocks.length; i < m; i += 2) {
const block = blocks[i];
if (block.charAt(0) === '@') {
//
// Things that look like our math markers will get
// stored and then retrieved along with the math.
//
blocks[i] = '@@' + math.length + '@@';
math.push(block);
} else if (start !== null) {
//
// If we are in math, look for the end delimiter,
// but don't go past double line breaks, and
// and balance braces within the math.
//
if (block === end) {
if (braces) {
last = i;
} else {
blocks = processMath(start, i, deTilde, math, blocks);
start = null;
end = null;
last = null;
}
} else if (block.match(/\n.*\n/)) {
if (last !== null) {
i = last;
blocks = processMath(start, i, deTilde, math, blocks);
}
start = null;
end = null;
last = null;
braces = 0;
} else if (block === '{') {
braces++;
} else if (block === '}' && braces) {
braces--;
}
} else {
//
// Look for math start delimiters and when
// found, set up the end delimiter.
//
if (block === inline || block === '$$') {
start = i;
end = block;
braces = 0;
} else if (block === '\\\\(' || block === '\\\\[') {
start = i;
end = block.slice(-1) === '(' ? '\\\\)' : '\\\\]';
braces = 0;
} else if (block.substr(1, 5) === 'begin') {
start = i;
end = '\\end' + block.substr(6);
braces = 0;
}
}
}
if (start !== null && last !== null) {
blocks = processMath(start, last, deTilde, math, blocks);
start = null;
end = null;
last = null;
}
return { text: deTilde(blocks.join('')), math };
}

/**
* Put back the math strings that were saved,
* and clear the math array (no need to keep it around).
*/
export function replaceMath(text: string, math: string[]): string {
/**
* Replace a math placeholder with its corresponding group.
* The math delimiters "\\(", "\\[", "\\)" and "\\]" are replaced
* removing one backslash in order to be interpreted correctly by MathJax.
*/
const process = (match: string, n: number): string => {
let group = math[n];
if (
group.substr(0, 3) === '\\\\(' &&
group.substr(group.length - 3) === '\\\\)'
) {
group = '\\(' + group.substring(3, group.length - 3) + '\\)';
} else if (
group.substr(0, 3) === '\\\\[' &&
group.substr(group.length - 3) === '\\\\]'
) {
group = '\\[' + group.substring(3, group.length - 3) + '\\]';
}
return group;
};
// Replace all the math group placeholders in the text
// with the saved strings.
return text.replace(/@@(\d+)@@/g, process);
}

/**
* Process math blocks.
*
* The math is in blocks i through j, so
* collect it into one block and clear the others.
* Replace &, <, and > by named entities.
* For IE, put <br> at the ends of comments since IE removes \n.
* Clear the current math positions and store the index of the
* math, then push the math string onto the storage array.
* The preProcess function is called on all blocks if it has been passed in
*/
function processMath(
i: number,
j: number,
preProcess: (input: string) => string,
math: string[],
blocks: string[]
): string[] {
let block = blocks
.slice(i, j + 1)
.join('')
.replace(/&/g, '&amp;') // use HTML entity for &
.replace(/</g, '&lt;') // use HTML entity for <
.replace(/>/g, '&gt;'); // use HTML entity for >
if (navigator && navigator.appName === 'Microsoft Internet Explorer') {
block = block.replace(/(%[^\n]*)\n/g, '$1<br/>\n');
}
while (j > i) {
blocks[j] = '';
j--;
}
blocks[i] = '@@' + math.length + '@@'; // replace the current block text with a unique tag to find later
if (preProcess) {
block = preProcess(block);
}
math.push(block);
return blocks;
}
39 changes: 39 additions & 0 deletions packages/base-manager/src/manager-base.ts
Expand Up @@ -26,9 +26,41 @@ import {
} from '@jupyter-widgets/base';

import { base64ToBuffer, bufferToBase64, hexToBuffer } from './utils';
import { removeMath, replaceMath } from './latex';
import sanitize from 'sanitize-html';

const PROTOCOL_MAJOR_VERSION = PROTOCOL_VERSION.split('.', 1)[0];

/**
* Sanitize HTML-formatted descriptions.
*/
function default_inline_sanitize(s: string): string {
const allowedTags = [
'a',
'abbr',
'b',
'code',
'em',
'i',
'img',
'li',
'ol',
'span',
'strong',
'ul',
];
const allowedAttributes = {
'*': ['aria-*', 'style', 'title'],
a: ['href'],
img: ['src'],
style: ['media', 'type'],
};
return sanitize(s, {
allowedTags: allowedTags,
allowedAttributes: allowedAttributes,
});
}

export interface IState extends PartialJSONObject {
buffers?: IBase64Buffers[];
model_name: string;
Expand Down Expand Up @@ -467,6 +499,13 @@ export abstract class ManagerBase implements IWidgetManager {
return Promise.resolve(url);
}

inline_sanitize(source: string): string {
const parts = removeMath(source);
// Sanitize tags for inline output.
const sanitized = default_inline_sanitize(parts['text']);
return replaceMath(sanitized, parts['math']);
}

/**
* The comm target name to register
*/
Expand Down
2 changes: 2 additions & 0 deletions packages/base/src/manager.ts
Expand Up @@ -190,4 +190,6 @@ export interface IWidgetManager {
* The default implementation just returns the original url.
*/
resolveUrl(url: string): Promise<string>;

inline_sanitize(s: string): string;
}
4 changes: 4 additions & 0 deletions packages/base/test/src/dummy-manager.ts
Expand Up @@ -316,6 +316,10 @@ export class DummyManager implements widgets.IWidgetManager {
return Promise.resolve(url);
}

inline_sanitize(s: string): string {
return s;
}

/**
* Dictionary of model ids and model instance promises
*/
Expand Down
7 changes: 6 additions & 1 deletion packages/controls/src/widget_bool.ts
Expand Up @@ -79,7 +79,12 @@ export class CheckboxView extends DescriptionView {
return;
}
const description = this.model.get('description');
this.descriptionSpan.innerHTML = description;
if (this.model.get('description_allow_html')) {
this.descriptionSpan.innerHTML =
this.model.widget_manager.inline_sanitize(description);
} else {
this.descriptionSpan.textContent = description;
}
this.typeset(this.descriptionSpan);
this.descriptionSpan.title = description;
this.checkbox.title = description;
Expand Down
13 changes: 12 additions & 1 deletion packages/controls/src/widget_description.ts
Expand Up @@ -41,6 +41,7 @@ export class DescriptionModel extends DOMWidgetModel {
_view_module_version: JUPYTER_CONTROLS_VERSION,
_model_module_version: JUPYTER_CONTROLS_VERSION,
description: '',
description_allow_html: false,
};
}
}
Expand All @@ -53,6 +54,11 @@ export class DescriptionView extends DOMWidgetView {
this.label.style.display = 'none';

this.listenTo(this.model, 'change:description', this.updateDescription);
this.listenTo(
this.model,
'change:description_allow_html',
this.updateDescription
);
this.listenTo(this.model, 'change:tabbable', this.updateTabindex);
this.updateDescription();
this.updateTabindex();
Expand All @@ -68,7 +74,12 @@ export class DescriptionView extends DOMWidgetView {
if (description.length === 0) {
this.label.style.display = 'none';
} else {
this.label.innerHTML = description;
if (this.model.get('description_allow_html')) {
this.label.innerHTML =
this.model.widget_manager.inline_sanitize(description);
} else {
this.label.textContent = description;
}
this.typeset(this.label);
this.label.style.display = '';
}
Expand Down
1 change: 1 addition & 0 deletions packages/html-manager/package.json
Expand Up @@ -52,6 +52,7 @@
"devDependencies": {
"@types/mocha": "^8.2.2",
"@types/node": "^15.12.2",
"@types/sanitize-html": "^1.20",
"chai": "^4.0.0",
"css-loader": "^5.2.6",
"file-loader": "^6.2.0",
Expand Down

0 comments on commit 8a2b049

Please sign in to comment.