Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fluent-syntax/src/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ function getErrorMessage(code, args) {
return `Unknown escape sequence: \\${char}.`;
}
case "E0026": {
const [char] = args;
return `Invalid Unicode escape sequence: \\u${char}.`;
const [sequence] = args;
return `Invalid Unicode escape sequence: ${sequence}.`;
}
case "E0027":
return "Unbalanced closing brace in TextElement.";
Expand Down
112 changes: 55 additions & 57 deletions fluent-syntax/src/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ import { ParseError } from "./errors";


const trailingWSRe = /[ \t\n\r]+$/;
// The Fluent Syntax spec uses /.*/ to parse comment lines. It matches all
// characters except the following ones, which are considered line endings by
// the regex engine.
const COMMENT_EOL = ["\n", "\r", "\u2028", "\u2029"];


function withSpan(fn) {
Expand Down Expand Up @@ -194,10 +190,10 @@ export default class FluentParser {
level = i;
}

if (!COMMENT_EOL.includes(ps.currentChar)) {
if (ps.currentChar !== EOL) {
ps.expectChar(" ");
let ch;
while ((ch = ps.takeChar(x => !COMMENT_EOL.includes(x)))) {
while ((ch = ps.takeChar(x => x !== EOL))) {
content += ch;
}
}
Expand Down Expand Up @@ -231,7 +227,7 @@ export default class FluentParser {
ps.skipBlankInline();
ps.expectChar("=");

const value = this.maybeGetValue(ps, {allowVariantList: false});
const value = this.maybeGetPattern(ps);
const attrs = this.getAttributes(ps);

if (value === null && attrs.length === 0) {
Expand All @@ -248,11 +244,9 @@ export default class FluentParser {
ps.skipBlankInline();
ps.expectChar("=");

// XXX Once https://github.com/projectfluent/fluent/pull/220 lands,
// getTerm will be the only place where VariantLists are still legal. Move
// the code from getPatternOrVariantList up to here then, and remove the
// allowVariantList switch.
const value = this.maybeGetValue(ps, {allowVariantList: true});
// Syntax 0.8 compat: VariantLists are supported but deprecated. They can
// only be found as values of Terms. Nested VariantLists are not allowed.
const value = this.maybeGetVariantList(ps) || this.maybeGetPattern(ps);
if (value === null) {
throw new ParseError("E0006", id.name);
}
Expand All @@ -269,7 +263,7 @@ export default class FluentParser {
ps.skipBlankInline();
ps.expectChar("=");

const value = this.maybeGetValue(ps, {allowVariantList: false});
const value = this.maybeGetPattern(ps);
if (value === null) {
throw new ParseError("E0012");
}
Expand Down Expand Up @@ -316,7 +310,7 @@ export default class FluentParser {
return this.getIdentifier(ps);
}

getVariant(ps, {hasDefault, allowVariantList}) {
getVariant(ps, {hasDefault}) {
let defaultIndex = false;

if (ps.currentChar === "*") {
Expand All @@ -337,23 +331,21 @@ export default class FluentParser {
ps.skipBlank();
ps.expectChar("]");

// XXX We need to pass allowVariantList all the way down to here because
// nested VariantLists in Terms are legal for now.
const value = this.maybeGetValue(ps, {allowVariantList});
const value = this.maybeGetPattern(ps);
if (value === null) {
throw new ParseError("E0012");
}

return new AST.Variant(key, value, defaultIndex);
}

getVariants(ps, {allowVariantList}) {
getVariants(ps) {
const variants = [];
let hasDefault = false;

ps.skipBlank();
while (ps.isVariantStart()) {
const variant = this.getVariant(ps, {allowVariantList, hasDefault});
const variant = this.getVariant(ps, {hasDefault});

if (variant.default) {
hasDefault = true;
Expand Down Expand Up @@ -409,34 +401,34 @@ export default class FluentParser {
return new AST.NumberLiteral(num);
}

// maybeGetValue distinguishes between patterns which start on the same line
// maybeGetPattern distinguishes between patterns which start on the same line
// as the identifier (a.k.a. inline signleline patterns and inline multiline
// patterns) and patterns which start on a new line (a.k.a. block multiline
// patterns). The distinction is important for the dedentation logic: the
// indent of the first line of a block pattern must be taken into account when
// calculating the maximum common indent.
maybeGetValue(ps, {allowVariantList}) {
maybeGetPattern(ps) {
ps.peekBlankInline();
if (ps.isValueStart()) {
ps.skipToPeek();
return this.getPatternOrVariantList(
ps, {isBlock: false, allowVariantList});
return this.getPattern(ps, {isBlock: false});
}

ps.peekBlankBlock();
if (ps.isValueContinuation()) {
ps.skipToPeek();
return this.getPatternOrVariantList(
ps, {isBlock: true, allowVariantList});
return this.getPattern(ps, {isBlock: true});
}

return null;
}

// Parse a VariantList (if allowed) or a Pattern.
getPatternOrVariantList(ps, {isBlock, allowVariantList}) {
ps.peekBlankInline();
if (allowVariantList && ps.currentPeek === "{") {
// Deprecated in Syntax 0.8. VariantLists are only allowed as values of Terms.
// Values of Messages, Attributes and Variants must be Patterns. This method
// is only used in getTerm.
maybeGetVariantList(ps) {
ps.peekBlank();
if (ps.currentPeek === "{") {
const start = ps.peekOffset;
ps.peek();
ps.peekBlankInline();
Expand All @@ -445,19 +437,18 @@ export default class FluentParser {
if (ps.isVariantStart()) {
ps.resetPeek(start);
ps.skipToPeek();
return this.getVariantList(ps, {allowVariantList});
return this.getVariantList(ps);
}
}
}

ps.resetPeek();
const pattern = this.getPattern(ps, {isBlock});
return pattern;
return null;
}

getVariantList(ps) {
ps.expectChar("{");
var variants = this.getVariants(ps, {allowVariantList: true});
var variants = this.getVariants(ps);
ps.expectChar("}");
return new AST.VariantList(variants);
}
Expand Down Expand Up @@ -599,37 +590,44 @@ export default class FluentParser {
getEscapeSequence(ps) {
const next = ps.currentChar;

if (next === "\\" || next === "\"") {
ps.next();
return [`\\${next}`, next];
switch (next) {
case "\\":
case "\"":
ps.next();
return [`\\${next}`, next];
case "u":
return this.getUnicodeEscapeSequence(ps, next, 4);
case "U":
return this.getUnicodeEscapeSequence(ps, next, 6);
default:
throw new ParseError("E0025", next);
}
}

if (next === "u") {
let sequence = "";
ps.next();

for (let i = 0; i < 4; i++) {
const ch = ps.takeHexDigit();
getUnicodeEscapeSequence(ps, u, digits) {
ps.expectChar(u);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already checked this in the switch statement, I'd just ps.next() here, and drop the u argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to keep expectChar. In fact, I'm planning a clean up after 0.8 where we get rid of most of ps.next(). I always seem to forget what it's supposed to consume, something expectChar is very explicit about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and we need the u arg for the error message. That's the real reason why it's there.


if (!ch) {
throw new ParseError("E0026", sequence + ps.currentChar);
}
let sequence = "";
for (let i = 0; i < digits; i++) {
const ch = ps.takeHexDigit();

sequence += ch;
if (!ch) {
throw new ParseError(
"E0026", `\\${u}${sequence}${ps.currentChar}`);
}

const codepoint = parseInt(sequence, 16);
const unescaped = codepoint <= 0xD7FF || 0xE000 <= codepoint
// It's a Unicode scalar value.
? String.fromCodePoint(codepoint)
// Escape sequences reresenting surrogate code points are well-formed
// but invalid in Fluent. Replace them with U+FFFD REPLACEMENT
// CHARACTER.
: "�";
return [`\\u${sequence}`, unescaped];
sequence += ch;
}

throw new ParseError("E0025", next);
const codepoint = parseInt(sequence, 16);
const unescaped = codepoint <= 0xD7FF || 0xE000 <= codepoint
// It's a Unicode scalar value.
? String.fromCodePoint(codepoint)
// Escape sequences reresenting surrogate code points are well-formed
// but invalid in Fluent. Replace them with U+FFFD REPLACEMENT
// CHARACTER.
: "�";
return [`\\${u}${sequence}`, unescaped];
}

getPlaceable(ps) {
Expand Down Expand Up @@ -676,7 +674,7 @@ export default class FluentParser {
ps.skipBlankInline();
ps.expectLineEnd();

const variants = this.getVariants(ps, {allowVariantList: false});
const variants = this.getVariants(ps);
return new AST.SelectExpression(selector, variants);
}

Expand Down
2 changes: 1 addition & 1 deletion fluent-syntax/test/fixtures_behavior/escape_sequences.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ key08 = {"Escaped \u0041 A"}
# ~ERROR E0025, pos 232, args "A"
key09 = {"\A"}

# ~ERROR E0026, pos 252, args "000z"
# ~ERROR E0026, pos 252, args "\u000z"
key10 = {"\u000z"}
1 change: 1 addition & 0 deletions fluent-syntax/test/fixtures_behavior/variant_lists.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ message2 =
*[one] One
}

# ~ERROR E0014, pos 211
-term2 =
{
*[one] {
Expand Down
5 changes: 2 additions & 3 deletions fluent-syntax/test/fixtures_reference/cr.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
"type": "Resource",
"body": [
{
"type": "Junk",
"annotations": [],
"content": "### This entire file uses CR as EOL.\r\rerr01 = Value 01\rerr02 = Value 02\r\rerr03 =\r\r Value 03\r Continued\r\r .title = Title\r\rerr04 = { \"str\r\rerr05 = { $sel -> }\r"
"type": "ResourceComment",
"content": "This entire file uses CR as EOL.\r\rerr01 = Value 01\rerr02 = Value 02\r\rerr03 =\r\r Value 03\r Continued\r\r .title = Title\r\rerr04 = { \"str\r\rerr05 = { $sel -> }\r"
}
]
}
16 changes: 14 additions & 2 deletions fluent-syntax/test/fixtures_reference/escaped_characters.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,20 @@ mismatched-quote = {"\\""}
unknown-escape = {"\x"}

## Unicode escapes
string-unicode-sequence = {"\u0041"}
string-escaped-unicode = {"\\u0041"}
string-unicode-4digits = {"\u0041"}
escape-unicode-4digits = {"\\u0041"}
string-unicode-6digits = {"\U01F602"}
escape-unicode-6digits = {"\\U01F602"}

# OK The trailing "00" is part of the literal value.
string-too-many-4digits = {"\u004100"}
# OK The trailing "00" is part of the literal value.
string-too-many-6digits = {"\U01F60200"}

# ERROR Too few hex digits after \u.
string-too-few-4digits = {"\u41"}
# ERROR Too few hex digits after \U.
string-too-few-6digits = {"\U1F602"}

## Literal braces
brace-open = An opening {"{"} brace.
Expand Down
Loading