Skip to content

Commit

Permalink
Support currently implemented sigils
Browse files Browse the repository at this point in the history
At time of writing we have the default sigil (no prefix), 's', 'b', 'S'
and 'B'. The way that the sigil is parsed depends on the sigil's prefix,
so we can't generically parse all future sigils.

For example the 'r' sigil for regular expressions is proposed to read
the contents verbatim even though it's lowercase, breaking with the
pattern defined in Elixir and carried forward with 's' and 'b'.

So for now we follow upstream and only parse what they have implemented.
  • Loading branch information
the-mikedavis committed Feb 9, 2024
1 parent 9f08ab2 commit 1574248
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 0 deletions.
102 changes: 102 additions & 0 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ module.exports = grammar({
_expression_without_call: ($) =>
choice(
$._identifier,
$.sigil,
$._strings,
alias($._triple_quoted_string, $.string),
$.tripledot,
Expand Down Expand Up @@ -500,6 +501,39 @@ module.exports = grammar({

_triple_quoted_string_contents: ($) => repeat1(choice('"', /[^"]+/)),

// See <https://www.erlang.org/eeps/eep-0066#sigil>.
sigil: ($) =>
seq(
"~",
choice(
// Default sigil (no prefix), escape sequences allowed.
$._sigil_body,
// `s` and `b` sigils, escape sequences allowed.
seq(alias(token.immediate(/[sb]/), $.sigil_prefix), $._sigil_body),
// `S` and `B` sigils, verbatim.
seq(
alias(token.immediate(/[SB]/), $.sigil_prefix),
$._sigil_body_verbatim
)
),
optional(alias($._sigil_affix, $.sigil_suffix))
),

// <https://www.erlang.org/eeps/eep-0066#sigil-prefix>:
// > Sigil Type which is a name composed of a sequence of characters that are allowed
// > as the second or later characters in a variable or an atom.
//
// Currently we entirely ignore the sigil name. How we parse the sigil depends on
// the sigil name - whether we accept escape sequences or not. To make matters worse,
// triple quoted strings never allow escape sequences, all contents are verbatim.
// This makes parsing sigils properly really hard, and we would probably need to
// add a scanner to do so. For now I'm just doing some dumb parsing and hoping
// everything will be OK.
_sigil_affix: ($) => token.immediate(/[a-zA-ZÀ-ÿ0-9_@]+/),

_sigil_body: ($) => choice(...sigilBodies($, false)),
_sigil_body_verbatim: ($) => choice(...sigilBodies($, true)),

// Used in typespecs:
// -type t :: [integer(), ...].
tripledot: ($) => "...",
Expand Down Expand Up @@ -551,3 +585,71 @@ function binaryOp($, precedence, assoc, operator, left = null, right = null) {
)
);
}

function sigilBodies($, verbatim) {
// <https://www.erlang.org/eeps/eep-0066#string-delimiters>
// Single-char delimiters: `/ | ' " ` #`
// Start-end pairs: `() [] {} <>`
const SIGIL_PAIRS = {
"/": "/",
"|": "|",
'"': '"',
"'": "'",
"`": "`",
"#": "#",
"(": ")",
"[": "]",
"{": "}",
"<": ">",
};

let bodies = [];
for (const [start, end] of Object.entries(SIGIL_PAIRS)) {
const escaped_end = end.replace(/[{}()|\[\]]/, "\\$&");

let body_options;
if (verbatim) {
body_options = [
alias(
new RegExp(`([^${escaped_end}\\\\]|\\\\[^${escaped_end}])+`),
$.quoted_content
),
alias(new RegExp(`\\\\${escaped_end}`), $.escape_sequence),
];
} else {
body_options = [
alias(new RegExp(`[^${escaped_end}\\\\]+`), $.quoted_content),
alias(new RegExp(`\\\\${escaped_end}`), $.escape_sequence),
$.escape_sequence,
];
}

bodies.push(
seq(
field("quoted_start", start),
repeat(choice(...body_options)),
field("quoted_end", end)
)
);
}

// Triple quoted string is also allowed and treats contents as usual: verbatim.
bodies.push($._triple_quoted_string);

return bodies;
}

function sigilBody($, start, end) {
const escaped_end = end.replace(/[{}()|\[\]"]/, "\\$&");
return seq(
field("quoted_start", start),
repeat(
choice(
alias(new RegExp(`[^${end}\\\\]+`), $.quoted_content),
alias(new RegExp(`\\\\${escaped_end}`), $.escape_sequence),
$.escape_sequence
)
),
field("quoted_end", end)
);
}
2 changes: 2 additions & 0 deletions queries/highlights.scm
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@
(variable) @variable
(atom) @string.special.symbol
(string) @string
(sigil) @string
(character) @constant
(escape_sequence) @constant.character.escape

[
(integer)
Expand Down
65 changes: 65 additions & 0 deletions test/corpus/sigils.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
================================================================================
Default sigil
================================================================================

~"Hello UTF8!"

--------------------------------------------------------------------------------

(source
(sigil
(quoted_content)))

================================================================================
Empty default sigil
================================================================================

~""

--------------------------------------------------------------------------------

(source
(sigil))

================================================================================
Lowercase sigil allows escapes
================================================================================

~b"Escapes\n"

--------------------------------------------------------------------------------

(source
(sigil
(sigil_prefix)
(quoted_content)
(escape_sequence)))

================================================================================
Uppercase sigil reads verbatim
================================================================================

~B"Escapes\n"

--------------------------------------------------------------------------------

(source
(sigil
(sigil_prefix)
(quoted_content)))

================================================================================
Triple quoted sigil
================================================================================

~B"""
This is a triple quoted string binary.
All contents are verbatim, even this: ~r"abc\d+"i.
"""

--------------------------------------------------------------------------------

(source
(sigil
(sigil_prefix)
(quoted_contents)))
20 changes: 20 additions & 0 deletions test/highlight/expressions.erl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ case Foo of
end
%% <- keyword

~"Hello world"
%% ^ string
%% ^ string
%% ^string
%% ^ string

~b"Hello \nworld"
%% ^ string
%% ^ string
%% ^ string
%% ^ constant.character.escape
%% ^ string

~B"Hello \nworld"
%% ^ string
%% ^ string
%% ^ string
%% ^ string
%% ^ string

-doc """
Foos the bar
""".
Expand Down

0 comments on commit 1574248

Please sign in to comment.