Permalink
Browse files

Add `es6` option

Enabling this option causes `jsesc` to use ES6 Unicode code point escapes for any astral Unicode symbols in the input.

Closes #13.
  • Loading branch information...
1 parent 0aa982d commit f0e59eefb936392a6758582c324f939913e108d0 @mathiasbynens committed Oct 25, 2013
Showing with 135 additions and 3 deletions.
  1. +22 −0 README.md
  2. +5 −0 bin/jsesc
  3. +22 −1 jsesc.js
  4. +5 −1 man/jsesc.1
  5. +22 −1 src/jsesc.js
  6. +59 −0 tests/tests.js
View
@@ -157,6 +157,28 @@ jsesc('Lorem ipsum "dolor" sit \'amet\' etc.', {
// → "\"Lorem ipsum \\\"dolor\\\" sit \'amet\' etc.\""
```
+#### `es6`
+
+The `es6` option takes a boolean value (`true` or `false`), and defaults to `false` (disabled). When enabled, any astral Unicode symbols in the input will be escaped using [ECMAScript 6 Unicode code point escape sequences](http://mathiasbynens.be/notes/javascript-escapes#unicode-code-point) instead of using separate escape sequences for each surrogate half. If backwards compatibility with ES5 environments is a concern, don’t enable this setting.
+
+```js
+// By default, the `es6` option is disabled:
+jsesc('foo 𝌆 bar 💩 baz');
+// → 'foo \\uD834\\uDF06 bar \\uD83D\\uDCA9 baz'
+
+// To explicitly disable it:
+jsesc('foo 𝌆 bar 💩 baz', {
+ 'es6': false
+});
+// → 'foo \\uD834\\uDF06 bar \\uD83D\\uDCA9 baz'
+
+// To enable it:
+jsesc('foo 𝌆 bar 💩 baz', {
+ 'es6': true
+});
+// → 'foo \\u{1D306} bar \\u{1F4A9} baz'
+```
+
#### `escapeEverything`
The `escapeEverything` option takes a boolean value (`true` or `false`), and defaults to `false` (disabled). When enabled, all the symbols in the output will be escaped, even printable ASCII symbols.
View
@@ -26,6 +26,7 @@
'\tjsesc [-d | --double-quotes] [string]',
'\tjsesc [-w | --wrap] [string]',
'\tjsesc [-e | --escape-everything] [string]',
+ '\tjsesc [-6 | --es6] [string]',
'\tjsesc [-j | --json] [string]',
'\tjsesc [-o | --object] [stringified_object]', // `JSON.parse()` the argument
'\tjsesc [-p | --pretty] [string]', // `compact: false`
@@ -60,6 +61,10 @@
options.wrap = true;
return;
}
+ if (/^(?:-6|--es6)$/.test(string)) {
+ options.es6 = true;
+ return;
+ }
if (/^(?:-e|--escape-everything)$/.test(string)) {
options.escapeEverything = true;
return;
View
@@ -85,6 +85,7 @@
'escapeEverything': false,
'quotes': 'single',
'wrap': false,
+ 'es6': false,
'json': false,
'compact': true,
'indent': '\t',
@@ -129,7 +130,8 @@
} else if (!isObject(argument)) {
if (json) {
// For some values (e.g. `undefined`, `function` objects),
- // `JSON.stringify(value)` returns `undefined` instead of `'null'`,
+ // `JSON.stringify(value)` returns `undefined` (which isn’t valid
+ // JSON) instead of `'null'`.
return JSON.stringify(argument) || 'null';
}
return String(argument);
@@ -160,9 +162,28 @@
// Loop over each code unit in the string and escape it
var index = -1;
var length = string.length;
+ var first;
+ var second;
+ var codePoint;
result = '';
while (++index < length) {
var character = string.charAt(index);
+ if (options.es6) {
+ first = string.charCodeAt(index);
+ if ( // check if it’s the start of a surrogate pair
+ first >= 0xD800 && first <= 0xDBFF && // high surrogate
+ length > index + 1 // there is a next code unit
+ ) {
+ second = string.charCodeAt(index + 1);
+ if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ codePoint = (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
+ result += '\\u{' + codePoint.toString(16).toUpperCase() + '}';
+ index++;
+ continue;
+ }
+ }
+ }
if (!options.escapeEverything) {
if (regexWhitelist.test(character)) {
// It’s a printable ASCII character that is not `"`, `'` or `\`,
View
@@ -1,4 +1,4 @@
-.Dd July 27, 2013
+.Dd October 25, 2013
.Dt jsesc 1
.Sh NAME
.Nm jsesc
@@ -11,6 +11,8 @@
.br
.Op Fl w | -wrap Ar string
.br
+.Op Fl 6 | -es6 Ar string
+.br
.Op Fl e | -escape-everything Ar string
.br
.Op Fl j | -json Ar string
@@ -37,6 +39,8 @@ Make sure the output is a valid JavaScript string literal wrapped in quotes. The
or
.Ar -d | --double-quotes
settings.
+.It Sy "-6, --es6"
+Escape any astral Unicode symbols using ECMAScript 6 Unicode code point escape sequences.
.It Sy "-e, --escape-everything"
Escape all the symbols in the output, even printable ASCII symbols.
.It Sy "-j, --json"
View
@@ -85,6 +85,7 @@
'escapeEverything': false,
'quotes': 'single',
'wrap': false,
+ 'es6': false,
'json': false,
'compact': true,
'indent': '\t',
@@ -129,7 +130,8 @@
} else if (!isObject(argument)) {
if (json) {
// For some values (e.g. `undefined`, `function` objects),
- // `JSON.stringify(value)` returns `undefined` instead of `'null'`,
+ // `JSON.stringify(value)` returns `undefined` (which isn’t valid
+ // JSON) instead of `'null'`.
return JSON.stringify(argument) || 'null';
}
return String(argument);
@@ -160,9 +162,28 @@
// Loop over each code unit in the string and escape it
var index = -1;
var length = string.length;
+ var first;
+ var second;
+ var codePoint;
result = '';
while (++index < length) {
var character = string.charAt(index);
+ if (options.es6) {
+ first = string.charCodeAt(index);
+ if ( // check if it’s the start of a surrogate pair
+ first >= 0xD800 && first <= 0xDBFF && // high surrogate
+ length > index + 1 // there is a next code unit
+ ) {
+ second = string.charCodeAt(index + 1);
+ if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ codePoint = (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
+ result += '\\u{' + codePoint.toString(16).toUpperCase() + '}';
+ index++;
+ continue;
+ }
+ }
+ }
if (!options.escapeEverything) {
if (regexWhitelist.test(character)) {
// It’s a printable ASCII character that is not `"`, `'` or `\`,
View
@@ -94,6 +94,28 @@
'escapeEverything'
);
equal(
+ jsesc('a\uD834\uDF06b', {
+ 'es6': true
+ }),
+ 'a\\u{1D306}b',
+ 'es6'
+ );
+ equal(
+ jsesc('a\uD834\uDF06b\uD83D\uDCA9c', {
+ 'es6': true
+ }),
+ 'a\\u{1D306}b\\u{1F4A9}c',
+ 'es6'
+ );
+ equal(
+ jsesc('a\uD834\uDF06b\uD83D\uDCA9c', {
+ 'es6': true,
+ 'escapeEverything': true
+ }),
+ '\\x61\\u{1D306}\\x62\\u{1F4A9}\\x63',
+ 'es6 + escapeEverything'
+ );
+ equal(
jsesc({}, {
'compact': true
}),
@@ -514,6 +536,43 @@
}
},
{
+ 'description': '-6 option',
+ 'command': './bin/jsesc -6 a\uD834\uDF06b\uD83D\uDCA9c',
+ 'expected': {
+ 'stdout': 'a\\u{1D306}b\\u{1F4A9}c\n',
+ 'stderr': '',
+ 'exitStatus': 0
+ }
+ },
+ {
+ 'description': '-6 option, piping content',
+ 'command': 'echo a\uD834\uDF06b\uD83D\uDCA9c | ./bin/jsesc -6',
+ 'expected': {
+ 'stdout': 'a\\u{1D306}b\\u{1F4A9}c\n',
+ 'stderr': '',
+ 'exitStatus': 0
+ }
+ },
+
+ {
+ 'description': '--es6 option',
+ 'command': './bin/jsesc --es6 a\uD834\uDF06b\uD83D\uDCA9c',
+ 'expected': {
+ 'stdout': 'a\\u{1D306}b\\u{1F4A9}c\n',
+ 'stderr': '',
+ 'exitStatus': 0
+ }
+ },
+ {
+ 'description': '--es6 option, piping content',
+ 'command': 'echo a\uD834\uDF06b\uD83D\uDCA9c | ./bin/jsesc --es6',
+ 'expected': {
+ 'stdout': 'a\\u{1D306}b\\u{1F4A9}c\n',
+ 'stderr': '',
+ 'exitStatus': 0
+ }
+ },
+ {
'description': '-e option',
'command': './bin/jsesc -e f\xF6o\\ \u2665\\ \\\'\\"\\\'\\"\\ b\xE5r\\ \uD834\uDF06\\ baz',
'expected': {

0 comments on commit f0e59ee

Please sign in to comment.