Skip to content

Commit

Permalink
Make lone high surrogates trigger parse errors too
Browse files Browse the repository at this point in the history
  • Loading branch information
mathiasbynens committed May 24, 2014
1 parent 6468c23 commit 74fd7cd
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 2 deletions.
2 changes: 1 addition & 1 deletion he.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions scripts/export-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ module.exports = {
return '&(' + readJSON('decode-legacy-named-references').join('|') +
')([=a-zA-Z0-9])?';
}()),
'regexLoneSurrogate': '[\\uD800-\\uDBFF](?:[^\\uDC00-\\uDFFF]|$)|(?:[^\\uD800-\uDBFF]|^)[\\uDC00-\\uDFFF]',
'testData': fs.readFileSync('data/entities.json', 'utf-8').trim(),
'version': JSON.parse(fs.readFileSync('package.json', 'utf-8')).version
};
9 changes: 8 additions & 1 deletion src/he.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,14 @@
};

var regexInvalidEntity = /&#(?:[xX][^a-fA-F0-9]|[^0-9xX])/;
var regexInvalidRawCodePoint = /<%= invalidRawCodePoints %>/;
var regexInvalidRawCodePoint = /<%=
invalidRawCodePoints
%>|<%=
// http://whatwg.org/html/parsing.html#preprocessing-the-input-stream
// “Any character that is a not a Unicode character, i.e. any isolated
// surrogate, is a parse error.”
regexLoneSurrogate
%>/;
var regexDecode = /<%=
regexDecimalEscapeSource
%>|<%=
Expand Down
24 changes: 24 additions & 0 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -8660,6 +8660,30 @@
'&#x61;&amp;&#x62;&#x31;&#x32;&#x33;&semi;&plus;&copy;&nvgt;&nvlt;&NewLine;&fjlig;&#x61;',
'All kinds of symbols when `encodeEverything: true, useNamedReferences: true`'
);
equal(
he.encode('foo\uDC00bar'),
'foo&#xDC00;bar',
'Lone high surrogate'
);
raises(
function() {
he.encode('foo\uDC00bar', { 'strict': true });
},
Error,
'Lone high surrogate triggers parse error when `strict: true`'
);
equal(
he.encode('foo\uD800bar'),
'foo&#xD800;bar',
'Lone low surrogate'
);
raises(
function() {
he.encode('foo\uD800bar', { 'strict': true });
},
Error,
'Lone low surrogate triggers parse error when `strict: true`'
);
// TODO: This should change as per issue #19.
equal(
he.encode('\0\x01\x02\x03\x04\x05\x06\x07\b\x0B\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF\uFFFE\uFFFF\uD83F\uDFFE\uD83F\uDFFF\uD87F\uDFFE\uD87F\uDFFF\uD8BF\uDFFE\uD8BF\uDFFF\uD8FF\uDFFE\uD8FF\uDFFF\uD93F\uDFFE\uD93F\uDFFF\uD97F\uDFFE\uD97F\uDFFF\uD9BF\uDFFE\uD9BF\uDFFF\uD9FF\uDFFE\uD9FF\uDFFF\uDA3F\uDFFE\uDA3F\uDFFF\uDA7F\uDFFE\uDA7F\uDFFF\uDABF\uDFFE\uDABF\uDFFF\uDAFF\uDFFE\uDAFF\uDFFF\uDB3F\uDFFE\uDB3F\uDFFF\uDB7F\uDFFE\uDB7F\uDFFF\uDBBF\uDFFE\uDBBF\uDFFF\uDBFF\uDFFE\uDBFF\uDFFF'),
Expand Down
24 changes: 24 additions & 0 deletions tests/tests.src.js
Original file line number Diff line number Diff line change
Expand Up @@ -6428,6 +6428,30 @@
'&#x61;&amp;&#x62;&#x31;&#x32;&#x33;&semi;&plus;&copy;&nvgt;&nvlt;&NewLine;&fjlig;&#x61;',
'All kinds of symbols when `encodeEverything: true, useNamedReferences: true`'
);
equal(
he.encode('foo\uDC00bar'),
'foo&#xDC00;bar',
'Lone high surrogate'
);
raises(
function() {
he.encode('foo\uDC00bar', { 'strict': true });
},
Error,
'Lone high surrogate triggers parse error when `strict: true`'
);
equal(
he.encode('foo\uD800bar'),
'foo&#xD800;bar',
'Lone low surrogate'
);
raises(
function() {
he.encode('foo\uD800bar', { 'strict': true });
},
Error,
'Lone low surrogate triggers parse error when `strict: true`'
);
// TODO: This should change as per issue #19.
equal(
he.encode(<%= invalidCodePointsString %>),
Expand Down

0 comments on commit 74fd7cd

Please sign in to comment.