projectfluent · stasm · Nov 30, 2018 · Nov 28, 2018 · Nov 28, 2018 · Nov 28, 2018
diff --git a/fluent-syntax/src/errors.js b/fluent-syntax/src/errors.js
@@ -69,8 +69,8 @@ function getErrorMessage(code, args) {
       return `Unknown escape sequence: \\${char}.`;
     }
     case "E0026": {
-      const [char] = args;
-      return `Invalid Unicode escape sequence: \\u${char}.`;
+      const [sequence] = args;
+      return `Invalid Unicode escape sequence: ${sequence}.`;
     }
     case "E0027":
       return "Unbalanced closing brace in TextElement.";

diff --git a/fluent-syntax/src/parser.js b/fluent-syntax/src/parser.js
@@ -6,10 +6,6 @@ import { ParseError } from "./errors";
 
 
 const trailingWSRe = /[ \t\n\r]+$/;
-// The Fluent Syntax spec uses /.*/ to parse comment lines. It matches all
-// characters except the following ones, which are considered line endings by
-// the regex engine.
-const COMMENT_EOL = ["\n", "\r", "\u2028", "\u2029"];
 
 
 function withSpan(fn) {
@@ -194,10 +190,10 @@ export default class FluentParser {
         level = i;
       }
 
-      if (!COMMENT_EOL.includes(ps.currentChar)) {
+      if (ps.currentChar !== EOL) {
         ps.expectChar(" ");
         let ch;
-        while ((ch = ps.takeChar(x => !COMMENT_EOL.includes(x)))) {
+        while ((ch = ps.takeChar(x => x !== EOL))) {
           content += ch;
         }
       }
@@ -231,7 +227,7 @@ export default class FluentParser {
     ps.skipBlankInline();
     ps.expectChar("=");
 
-    const value = this.maybeGetValue(ps, {allowVariantList: false});
+    const value = this.maybeGetPattern(ps);
     const attrs = this.getAttributes(ps);
 
     if (value === null && attrs.length === 0) {
@@ -248,11 +244,9 @@ export default class FluentParser {
     ps.skipBlankInline();
     ps.expectChar("=");
 
-    // XXX Once https://github.com/projectfluent/fluent/pull/220 lands,
-    // getTerm will be the only place where VariantLists are still legal. Move
-    // the code from getPatternOrVariantList up to here then, and remove the
-    // allowVariantList switch.
-    const value = this.maybeGetValue(ps, {allowVariantList: true});
+    // Syntax 0.8 compat: VariantLists are supported but deprecated. They can
+    // only be found as values of Terms. Nested VariantLists are not allowed.
+    const value = this.maybeGetVariantList(ps) || this.maybeGetPattern(ps);
     if (value === null) {
       throw new ParseError("E0006", id.name);
     }
@@ -269,7 +263,7 @@ export default class FluentParser {
     ps.skipBlankInline();
     ps.expectChar("=");
 
-    const value = this.maybeGetValue(ps, {allowVariantList: false});
+    const value = this.maybeGetPattern(ps);
     if (value === null) {
       throw new ParseError("E0012");
     }
@@ -316,7 +310,7 @@ export default class FluentParser {
     return this.getIdentifier(ps);
   }
 
-  getVariant(ps, {hasDefault, allowVariantList}) {
+  getVariant(ps, {hasDefault}) {
     let defaultIndex = false;
 
     if (ps.currentChar === "*") {
@@ -337,23 +331,21 @@ export default class FluentParser {
     ps.skipBlank();
     ps.expectChar("]");
 
-    // XXX We need to pass allowVariantList all the way down to here because
-    // nested VariantLists in Terms are legal for now.
-    const value = this.maybeGetValue(ps, {allowVariantList});
+    const value = this.maybeGetPattern(ps);
     if (value === null) {
       throw new ParseError("E0012");
     }
 
     return new AST.Variant(key, value, defaultIndex);
   }
 
-  getVariants(ps, {allowVariantList}) {
+  getVariants(ps) {
     const variants = [];
     let hasDefault = false;
 
     ps.skipBlank();
     while (ps.isVariantStart()) {
-      const variant = this.getVariant(ps, {allowVariantList, hasDefault});
+      const variant = this.getVariant(ps, {hasDefault});
 
       if (variant.default) {
         hasDefault = true;
@@ -409,34 +401,34 @@ export default class FluentParser {
     return new AST.NumberLiteral(num);
   }
 
-  // maybeGetValue distinguishes between patterns which start on the same line
+  // maybeGetPattern distinguishes between patterns which start on the same line
   // as the identifier (a.k.a. inline signleline patterns and inline multiline
   // patterns) and patterns which start on a new line (a.k.a. block multiline
   // patterns). The distinction is important for the dedentation logic: the
   // indent of the first line of a block pattern must be taken into account when
   // calculating the maximum common indent.
-  maybeGetValue(ps, {allowVariantList}) {
+  maybeGetPattern(ps) {
     ps.peekBlankInline();
     if (ps.isValueStart()) {
       ps.skipToPeek();
-      return this.getPatternOrVariantList(
-        ps, {isBlock: false, allowVariantList});
+      return this.getPattern(ps, {isBlock: false});
     }
 
     ps.peekBlankBlock();
     if (ps.isValueContinuation()) {
       ps.skipToPeek();
-      return this.getPatternOrVariantList(
-        ps, {isBlock: true, allowVariantList});
+      return this.getPattern(ps, {isBlock: true});
     }
 
     return null;
   }
 
-  // Parse a VariantList (if allowed) or a Pattern.
-  getPatternOrVariantList(ps, {isBlock, allowVariantList}) {
-    ps.peekBlankInline();
-    if (allowVariantList && ps.currentPeek === "{") {
+  // Deprecated in Syntax 0.8. VariantLists are only allowed as values of Terms.
+  // Values of Messages, Attributes and Variants must be Patterns. This method
+  // is only used in getTerm.
+  maybeGetVariantList(ps) {
+    ps.peekBlank();
+    if (ps.currentPeek === "{") {
       const start = ps.peekOffset;
       ps.peek();
       ps.peekBlankInline();
@@ -445,19 +437,18 @@ export default class FluentParser {
         if (ps.isVariantStart()) {
           ps.resetPeek(start);
           ps.skipToPeek();
-          return this.getVariantList(ps, {allowVariantList});
+          return this.getVariantList(ps);
         }
       }
     }
 
     ps.resetPeek();
-    const pattern = this.getPattern(ps, {isBlock});
-    return pattern;
+    return null;
   }
 
   getVariantList(ps) {
     ps.expectChar("{");
-    var variants = this.getVariants(ps, {allowVariantList: true});
+    var variants = this.getVariants(ps);
     ps.expectChar("}");
     return new AST.VariantList(variants);
   }
@@ -599,37 +590,44 @@ export default class FluentParser {
   getEscapeSequence(ps) {
     const next = ps.currentChar;
 
-    if (next === "\\" || next === "\"") {
-      ps.next();
-      return [`\\${next}`, next];
+    switch (next) {
+      case "\\":
+      case "\"":
+        ps.next();
+        return [`\\${next}`, next];
+      case "u":
+        return this.getUnicodeEscapeSequence(ps, next, 4);
+      case "U":
+        return this.getUnicodeEscapeSequence(ps, next, 6);
+      default:
+        throw new ParseError("E0025", next);
     }
+  }
 
-    if (next === "u") {
-      let sequence = "";
-      ps.next();
-
-      for (let i = 0; i < 4; i++) {
-        const ch = ps.takeHexDigit();
+  getUnicodeEscapeSequence(ps, u, digits) {
+    ps.expectChar(u);
 
-        if (!ch) {
-          throw new ParseError("E0026", sequence + ps.currentChar);
-        }
+    let sequence = "";
+    for (let i = 0; i < digits; i++) {
+      const ch = ps.takeHexDigit();
 
-        sequence += ch;
+      if (!ch) {
+        throw new ParseError(
+          "E0026", `\\${u}${sequence}${ps.currentChar}`);
       }
 
-      const codepoint = parseInt(sequence, 16);
-      const unescaped = codepoint <= 0xD7FF || 0xE000 <= codepoint
-        // It's a Unicode scalar value.
-        ? String.fromCodePoint(codepoint)
-        // Escape sequences reresenting surrogate code points are well-formed
-        // but invalid in Fluent. Replace them with U+FFFD REPLACEMENT
-        // CHARACTER.
-        : "�";
-      return [`\\u${sequence}`, unescaped];
+      sequence += ch;
     }
 
-    throw new ParseError("E0025", next);
+    const codepoint = parseInt(sequence, 16);
+    const unescaped = codepoint <= 0xD7FF || 0xE000 <= codepoint
+      // It's a Unicode scalar value.
+      ? String.fromCodePoint(codepoint)
+      // Escape sequences reresenting surrogate code points are well-formed
+      // but invalid in Fluent. Replace them with U+FFFD REPLACEMENT
+      // CHARACTER.
+      : "�";
+    return [`\\${u}${sequence}`, unescaped];
   }
 
   getPlaceable(ps) {
@@ -676,7 +674,7 @@ export default class FluentParser {
       ps.skipBlankInline();
       ps.expectLineEnd();
 
-      const variants = this.getVariants(ps, {allowVariantList: false});
+      const variants = this.getVariants(ps);
       return new AST.SelectExpression(selector, variants);
     }
 

diff --git a/fluent-syntax/test/fixtures_behavior/escape_sequences.ftl b/fluent-syntax/test/fixtures_behavior/escape_sequences.ftl
@@ -12,5 +12,5 @@ key08 = {"Escaped \u0041 A"}
 # ~ERROR E0025, pos 232, args "A"
 key09 = {"\A"}
 
-# ~ERROR E0026, pos 252, args "000z"
+# ~ERROR E0026, pos 252, args "\u000z"
 key10 = {"\u000z"}
diff --git a/fluent-syntax/test/fixtures_behavior/variant_lists.ftl b/fluent-syntax/test/fixtures_behavior/variant_lists.ftl
@@ -17,6 +17,7 @@ message2 =
         *[one] One
     }
 
+# ~ERROR E0014, pos 211
 -term2 =
     {
         *[one] {

diff --git a/fluent-syntax/test/fixtures_reference/cr.json b/fluent-syntax/test/fixtures_reference/cr.json
@@ -2,9 +2,8 @@
     "type": "Resource",
     "body": [
         {
-            "type": "Junk",
-            "annotations": [],
-            "content": "### This entire file uses CR as EOL.\r\rerr01 = Value 01\rerr02 = Value 02\r\rerr03 =\r\r    Value 03\r    Continued\r\r    .title = Title\r\rerr04 = { \"str\r\rerr05 = { $sel -> }\r"
+            "type": "ResourceComment",
+            "content": "This entire file uses CR as EOL.\r\rerr01 = Value 01\rerr02 = Value 02\r\rerr03 =\r\r    Value 03\r    Continued\r\r    .title = Title\r\rerr04 = { \"str\r\rerr05 = { $sel -> }\r"
         }
     ]
 }
diff --git a/fluent-syntax/test/fixtures_reference/escaped_characters.ftl b/fluent-syntax/test/fixtures_reference/escaped_characters.ftl
@@ -14,8 +14,20 @@ mismatched-quote = {"\\""}
 unknown-escape = {"\x"}
 
 ## Unicode escapes
-string-unicode-sequence = {"\u0041"}
-string-escaped-unicode = {"\\u0041"}
+string-unicode-4digits = {"\u0041"}
+escape-unicode-4digits = {"\\u0041"}
+string-unicode-6digits = {"\U01F602"}
+escape-unicode-6digits = {"\\U01F602"}
+
+# OK The trailing "00" is part of the literal value.
+string-too-many-4digits = {"\u004100"}
+# OK The trailing "00" is part of the literal value.
+string-too-many-6digits = {"\U01F60200"}
+
+# ERROR Too few hex digits after \u.
+string-too-few-4digits = {"\u41"}
+# ERROR Too few hex digits after \U.
+string-too-few-6digits = {"\U1F602"}
 
 ## Literal braces
 brace-open = An opening {"{"} brace.
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,6 +17,7 @@ message2 = @@
             *[one] One
         }
+    # ~ERROR E0014, pos 211
     -term2 =
         {
             *[one] {
@@ Expand Down @@