diff --git a/lib/combinators.mjs b/lib/combinators.mjs index 1a52312..c0da4b6 100644 --- a/lib/combinators.mjs +++ b/lib/combinators.mjs @@ -31,7 +31,7 @@ export function regex(re) { } export function charset(range) { - return regex(new RegExp(`[${range}]`)); + return regex(`[${range}]`); } export function eof() { diff --git a/lib/stream.mjs b/lib/stream.mjs index 1731125..92aa386 100644 --- a/lib/stream.mjs +++ b/lib/stream.mjs @@ -22,7 +22,11 @@ export default class Stream { // Execute a regex on the iterable. exec(re) { - const sticky = new RegExp(re, "y"); + // The "u" flag is a feature of ES2015 which makes regexes Unicode-aware. + // See https://mathiasbynens.be/notes/es6-unicode-regex. + // The "y" flag makes the regex sticky. The match must start at the + // offset specified by the regex's lastIndex property. + let sticky = new RegExp(re, "uy"); sticky.lastIndex = this.cursor; return sticky.exec(this.iterable); } diff --git a/lib/visitor.mjs b/lib/visitor.mjs index a0553c6..933a50b 100644 --- a/lib/visitor.mjs +++ b/lib/visitor.mjs @@ -79,8 +79,9 @@ export default { function escape(str) { return str - .replace("\\", "\\\\") - .replace("\"", "\\\"") + // Escape backslash and double quote, which are special in EBNF. + .replace(/\\/g, "\\\\") + .replace(/"/g, "\\\"") // Replace all Control and non-Basic Latin characters. .replace(/([^\u0021-\u007E])/g, unicode_sequence); } diff --git a/spec/CHANGELOG.md b/spec/CHANGELOG.md index a1d4975..9c0a0b7 100644 --- a/spec/CHANGELOG.md +++ b/spec/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + + - Support astral Unicode characters. (#179) + + Unicode characters from outside of the Basic Multilingual Plane can now + be used in values of `TextElements` and `StringLiterals`. This means all + characters in the U+10000 to U+10FFFF range. 🎉 ## 0.7.0 (October 15, 2018) diff --git a/spec/fluent.ebnf b/spec/fluent.ebnf index 16697cd..f0a1fa8 100644 --- a/spec/fluent.ebnf +++ b/spec/fluent.ebnf @@ -92,9 +92,8 @@ quote ::= "\"" /* Any Unicode character from BMP excluding C0 control characters, space, * surrogate blocks and non-characters (U+FFFE, U+FFFF). * Cf. https://www.w3.org/TR/REC-xml/#NT-Char - * TODO Add characters from other planes: U+10000 to U+10FFFF. */ -regular_char ::= [!-\uD7FF\uE000-\uFFFD] +regular_char ::= [\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}] text_char ::= blank_inline | "\u0009" | /\\u[0-9a-fA-F]{4}/ diff --git a/syntax/grammar.mjs b/syntax/grammar.mjs index cfb9b2f..17a35f4 100644 --- a/syntax/grammar.mjs +++ b/syntax/grammar.mjs @@ -386,10 +386,9 @@ let quote = string("\""); /* Any Unicode character from BMP excluding C0 control characters, space, * surrogate blocks and non-characters (U+FFFE, U+FFFF). * Cf. https://www.w3.org/TR/REC-xml/#NT-Char - * TODO Add characters from other planes: U+10000 to U+10FFFF. */ let regular_char = - charset("\u0021-\uD7FF\uE000-\uFFFD"); + charset("\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}"); let text_char = defer(() => either( diff --git a/test/fixtures/astral.ftl b/test/fixtures/astral.ftl new file mode 100644 index 0000000..b77e32e --- /dev/null +++ b/test/fixtures/astral.ftl @@ -0,0 +1,20 @@ +face-with-tears-of-joy = 😂 +tetragram-for-centre = 𝌆 + +surrogates-in-text = \uD83D\uDE02 +surrogates-in-string = {"\uD83D\uDE02"} +surrogates-in-adjacent-strings = {"\uD83D"}{"\uDE02"} + +emoji-in-text = A face 😂 with tears of joy. +emoji-in-string = {"A face 😂 with tears of joy."} + +# ERROR Invalid identifier +err-😂 = Value + +# ERROR Invalid expression +err-invalid-expression = { 😂 } + +# ERROR Invalid variant key +err-invalid-variant-key = { $sel -> + *[😂] Value +} diff --git a/test/fixtures/astral.json b/test/fixtures/astral.json new file mode 100644 index 0000000..6056fb7 --- /dev/null +++ b/test/fixtures/astral.json @@ -0,0 +1,174 @@ +{ + "type": "Resource", + "body": [ + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "face-with-tears-of-joy" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "😂" + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "tetragram-for-centre" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "𝌆" + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "surrogates-in-text" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "\\uD83D\\uDE02" + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "surrogates-in-string" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\uD83D\\uDE02" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "surrogates-in-adjacent-strings" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\uD83D" + } + }, + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\uDE02" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "emoji-in-text" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "A face 😂 with tears of joy." + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "emoji-in-string" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "A face 😂 with tears of joy." + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Comment", + "content": "ERROR Invalid identifier" + }, + { + "type": "Junk", + "annotations": [], + "content": "err-😂 = Value\n" + }, + { + "type": "Comment", + "content": "ERROR Invalid expression" + }, + { + "type": "Junk", + "annotations": [], + "content": "err-invalid-expression = { 😂 }\n" + }, + { + "type": "Comment", + "content": "ERROR Invalid variant key" + }, + { + "type": "Junk", + "annotations": [], + "content": "err-invalid-variant-key = { $sel ->\n *[😂] Value\n}\n" + } + ] +}