/
expression_tokenizer.ex
138 lines (118 loc) · 3.13 KB
/
expression_tokenizer.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
defmodule Expug.ExpressionTokenizer do
@moduledoc ~S"""
Tokenizes an expression.
This is used by `Expug.Tokenizer` to match attribute values and support multiline.
`expression/2` is used to capture an expression token.
state
|> Expug.ExpressionTokenizer.expression(:attribute_value)
## Valid expressions
Expressions are combination of one or more of these:
- a word without spaces
- a balanced `(` ... `)` pair (or `[`, or `{`)
- a string with single quotes `'...'` or double quotes `"..."`
A balanced pair can have balanced pairs, words, and strings inside them.
Double-quote strings can have `#{...}` interpolation inside them.
## Examples
These are valid expressions:
hello
hello(1 + 2)
"Hello world" # strings
(hello world) # balanced (...) pair
These aren't:
hello world # spaces
hello(world[) # pairs not balanced
"hello #{foo(}" # not balanced inside an interpolation
"""
import Expug.TokenizerTools
def expression(state, token_name) do
state
|> start_empty(token_name)
|> many_of(&expression_fragment/1)
end
def expression_fragment(state) do
state
|> one_of([
&balanced_parentheses/1,
&balanced_braces/1,
&balanced_brackets/1,
&double_quote_string/1,
&single_quote_string/1,
&expression_term/1
])
end
@doc """
Matches simple expressions like `xyz` or even `a+b`.
"""
def expression_term(state) do
state
|> append(~r/^[^\(\)\[\]\{\}"', \n\t]+/)
end
@doc """
Matches simple expressions like `xyz`, but only for inside parentheses.
These can have spaces.
"""
def expression_term_inside(state) do
state
|> append(~r/^[^\(\)\[\]\{\}"']+/)
end
@doc """
Matches balanced `(...)` fragments
"""
def balanced_parentheses(state) do
state
|> balanced_pairs(~r/^\(/, ~r/^\)/)
end
@doc """
Matches balanced `{...}` fragments
"""
def balanced_braces(state) do
state
|> balanced_pairs(~r/^\{/, ~r/^\}/)
end
@doc """
Matches balanced `[...]` fragments
"""
def balanced_brackets(state) do
state
|> balanced_pairs(~r/^\[/, ~r/^\]/)
end
@doc """
Underlying implementation for `balanced_*` functions
"""
def balanced_pairs(state, left, right) do
state
|> append(left)
|> optional(fn s -> s
|> many_of(fn s -> s
|> one_of([
&expression_fragment/1,
&expression_term_inside/1
])
end)
end)
|> append(right)
end
@doc """
Matches an entire double-quoted string, taking care of interpolation and escaping
"""
def double_quote_string(state) do
state
|> append(~r/^"/)
|> optional_many_of(fn s -> s
|> one_of([
&(&1 |> append(~r/^#/) |> balanced_braces()),
&(&1 |> append(~r/^(?:(?:\\")|[^"])/))
])
end)
|> append(~r/^"/)
end
@doc """
Matches an entire double-quoted string, taking care of escaping
"""
def single_quote_string(state) do
state
|> append(~r/^'/)
|> optional_many_of(&(&1 |> append(~r/^(?:(?:\\')|[^'])/)))
|> append(~r/^'/)
end
end