From 055eef2e22fa3218af3099cda43598d8a532b157 Mon Sep 17 00:00:00 2001 From: Jasha10 <8935917+Jasha10@users.noreply.github.com> Date: Tue, 19 Oct 2021 15:47:11 -0500 Subject: [PATCH] Interpolation grammar: allow pipe `|` in unquoted strings (#799) --- news/799.feature | 1 + omegaconf/grammar/OmegaConfGrammarLexer.g4 | 2 +- omegaconf/grammar/OmegaConfGrammarParser.g4 | 4 +-- omegaconf/grammar_parser.py | 5 +++- tests/test_grammar.py | 27 +++++++++++++++------ 5 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 news/799.feature diff --git a/news/799.feature b/news/799.feature new file mode 100644 index 000000000..6494fef8c --- /dev/null +++ b/news/799.feature @@ -0,0 +1 @@ +Enable the use of the pipe symbol `|` in unquoted strings when parsing interpolations. diff --git a/omegaconf/grammar/OmegaConfGrammarLexer.g4 b/omegaconf/grammar/OmegaConfGrammarLexer.g4 index 73f616b7c..30c9458fe 100644 --- a/omegaconf/grammar/OmegaConfGrammarLexer.g4 +++ b/omegaconf/grammar/OmegaConfGrammarLexer.g4 @@ -65,7 +65,7 @@ BOOL: NULL: [Nn][Uu][Ll][Ll]; -UNQUOTED_CHAR: [/\-\\+.$%*@?]; // other characters allowed in unquoted strings +UNQUOTED_CHAR: [/\-\\+.$%*@?|]; // other characters allowed in unquoted strings ID: (CHAR|'_') (CHAR|DIGIT|'_')*; ESC: (ESC_BACKSLASH | '\\(' | '\\)' | '\\[' | '\\]' | '\\{' | '\\}' | '\\:' | '\\=' | '\\,' | '\\ ' | '\\\t')+; diff --git a/omegaconf/grammar/OmegaConfGrammarParser.g4 b/omegaconf/grammar/OmegaConfGrammarParser.g4 index f0f256c9a..0628db7b8 100644 --- a/omegaconf/grammar/OmegaConfGrammarParser.g4 +++ b/omegaconf/grammar/OmegaConfGrammarParser.g4 @@ -70,7 +70,7 @@ primitive: | INT // 0, 10, -20, 1_000_000 | FLOAT // 3.14, -20.0, 1e-1, -10e3 | BOOL // true, TrUe, false, False - | UNQUOTED_CHAR // /, -, \, +, ., $, %, *, @ + | UNQUOTED_CHAR // /, -, \, +, ., $, %, *, @, ?, | | COLON // : | ESC // \\, \(, \), \[, \], \{, \}, \:, \=, \ , \\t, \, | WS // whitespaces @@ -84,7 +84,7 @@ dictKey: | INT // 0, 10, -20, 1_000_000 | FLOAT // 3.14, -20.0, 1e-1, -10e3 | BOOL // true, TrUe, false, False - | UNQUOTED_CHAR // /, -, \, +, ., $, %, *, @ + | UNQUOTED_CHAR // /, -, \, +, ., $, %, *, @, ?, | | ESC // \\, \(, \), \[, \], \{, \}, \:, \=, \ , \\t, \, | WS // whitespaces )+; \ No newline at end of file diff --git a/omegaconf/grammar_parser.py b/omegaconf/grammar_parser.py index ed6e4ee4a..1a5925aa2 100644 --- a/omegaconf/grammar_parser.py +++ b/omegaconf/grammar_parser.py @@ -27,7 +27,7 @@ _node_inter = f"\\${{\\s*{_node_path}\\s*}}" # node interpolation ${foo.bar} _id = "[a-zA-Z_]\\w*" # foo, foo_bar, abc123 _resolver_name = f"({_id}(\\.{_id})*)?" # foo, ns.bar3, ns_1.ns_2.b0z -_arg = "[a-zA-Z_0-9/\\-\\+.$%*@]+" # string representing a resolver argument +_arg = "[a-zA-Z_0-9/\\-\\+.$%*@?|]+" # string representing a resolver argument _args = f"{_arg}(\\s*,\\s*{_arg})*" # list of resolver arguments _resolver_inter = f"\\${{\\s*{_resolver_name}\\s*:\\s*{_args}?\\s*}}" # ${foo:bar} _inter = f"({_node_inter}|{_resolver_inter})" # any kind of interpolation @@ -35,6 +35,9 @@ SIMPLE_INTERPOLATION_PATTERN = re.compile( f"({_outer})?({_inter}({_outer})?)+$", flags=re.ASCII ) +# NOTE: SIMPLE_INTERPOLATION_PATTERN must not generate false positive matches: +# it must not accept anything that isn't a valid interpolation (per the +# interpolation grammar defined in `omegaconf/grammar/*.g4`). class OmegaConfErrorListener(ErrorListener): # type: ignore diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 03c222303..52f98df2a 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -29,6 +29,7 @@ # Characters that are not allowed by the grammar in config key names. INVALID_CHARS_IN_KEY_NAMES = r"""\{}()[].:"' """ +UNQUOTED_SPECIAL = r"/-\+.$%*@?|" # special characters allowed in unquoted strings # A fixed config that may be used (but not modified!) by tests. BASE_TEST_CFG = OmegaConf.create( @@ -106,7 +107,11 @@ ("float_minus_nan", "-nan", math.nan), # Unquoted strings. # Note: raw strings do not allow trailing \, adding a space and stripping it. - ("str_legal", r" a/-\+.$*@?\\ ".strip(), r" a/-\+.$*@?\ ".strip()), + ( + "str_legal", + (r" a" + UNQUOTED_SPECIAL + r"\\ ").strip(), + (r" a" + UNQUOTED_SPECIAL + r"\ ").strip(), + ), ("str_illegal_1", "a,=b", GrammarParseError), ("str_illegal_2", f"{chr(200)}", GrammarParseError), ("str_illegal_3", f"{chr(129299)}", GrammarParseError), @@ -128,8 +133,8 @@ ("str_esc_illegal_1", r"\#", GrammarParseError), ("str_esc_illegal_2", r""" \'\" """.strip(), GrammarParseError), # Quoted strings. - ("str_quoted_single", "'!@#$%^&*()[]:.,\"'", '!@#$%^&*()[]:.,"'), - ("str_quoted_double", '"!@#$%^&*()[]:.,\'"', "!@#$%^&*()[]:.,'"), + ("str_quoted_single", "'!@#$%^&*|()[]:.,\"'", '!@#$%^&*|()[]:.,"'), + ("str_quoted_double", '"!@#$%^&*|()[]:.,\'"', "!@#$%^&*|()[]:.,'"), ("str_quoted_outer_ws_single", "' a \t'", " a \t"), ("str_quoted_outer_ws_double", '" a \t"', " a \t"), ("str_quoted_int", "'123'", "123"), @@ -181,8 +186,10 @@ ), ( "dict_unquoted_key", - fr"{{a0-null-1-3.14-NaN- {TAB}-true-False-/\+.$%*@\(\)\[\]\{{\}}\:\=\ \{TAB}\,:0}}", - {fr"a0-null-1-3.14-NaN- {TAB}-true-False-/\+.$%*@()[]{{}}:= {TAB},": 0}, + fr"{{a0-null-1-3.14-NaN- {TAB}-true-False-{UNQUOTED_SPECIAL}\(\)\[\]\{{\}}\:\=\ \{TAB}\,:0}}", + { + fr"a0-null-1-3.14-NaN- {TAB}-true-False-{UNQUOTED_SPECIAL}()[]{{}}:= {TAB},": 0 + }, ), ( "dict_quoted", @@ -364,7 +371,11 @@ ("str_top_middle_quote_double", 'I"d like ${str}', 'I"d like hi'), ("str_top_middle_quotes_single", "I like '${str}'", "I like 'hi'"), ("str_top_middle_quotes_double", 'I like "${str}"', 'I like "hi"'), - ("str_top_any_char", r"${str} !@\#$%^&*})][({,/?;", r"hi !@\#$%^&*})][({,/?;"), + ( + "str_top_any_char", + r"${str} " + UNQUOTED_SPECIAL + r"^!#&})][({,;", + r"hi " + UNQUOTED_SPECIAL + r"^!#&})][({,;", + ), ("str_top_esc_inter", r"Esc: \${str}", "Esc: ${str}"), ("str_top_esc_inter_wrong_1", r"Wrong: $\{str\}", r"Wrong: $\{str\}"), ("str_top_esc_inter_wrong_2", r"Wrong: \${str\}", r"Wrong: ${str\}"), @@ -602,7 +613,7 @@ def visit() -> Any: "$ ${foo} ${bar} ${boz} $", "${foo:bar}", "${foo : bar, baz, boz}", - "${foo:bar,0,a-b+c*d/$.%@}", + "${foo:bar,0,a-b+c*d/$.%@?|}", r"\${foo}", "${foo.bar:boz}", "${$foo.bar$.x$y}", @@ -729,7 +740,7 @@ def callback(inter_key: Any, memo: Optional[Set[int]]) -> Any: def test_custom_resolver_param_supported_chars() -> None: - supported_chars = r"abc123_/:-\+.$%*@" + supported_chars = r"abc123_:" + UNQUOTED_SPECIAL c = OmegaConf.create({"dir1": "${copy:" + supported_chars + "}"}) OmegaConf.register_new_resolver("copy", lambda x: x)