Skip to content

Commit 2b1d764

Browse files
committed
parse new boundary and character class syntax
1 parent 7503e20 commit 2b1d764

File tree

1 file changed

+49
-11
lines changed

1 file changed

+49
-11
lines changed

STD.pm6

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4694,6 +4694,9 @@ grammar Regex is STD {
46944694
token category:quantifier { <sym> }
46954695
proto token quantifier {*}
46964696

4697+
token category:cclass_elem { <sym> }
4698+
proto token cclass_elem {*}
4699+
46974700
token category:mod_internal { <sym> }
46984701
proto token mod_internal {*}
46994702

@@ -4958,6 +4961,7 @@ grammar Regex is STD {
49584961
token assertion:sym<???> { <sym> }
49594962
token assertion:sym<!!!> { <sym> }
49604963

4964+
token assertion:sym<|> { <sym> [ <?before '>'> | <?before \w> <assertion> ] } # assertion-like syntax, anyway
49614965
token assertion:sym<?> { <sym> [ <?before '>'> | <assertion> ] }
49624966
token assertion:sym<!> { <sym> [ <?before '>'> | <assertion> ] }
49634967
token assertion:sym<*> { <sym> [ <?before '>'> | <.ws> <nibbler> ] }
@@ -4993,27 +4997,61 @@ grammar Regex is STD {
49934997
]?
49944998
}
49954999

4996-
token assertion:sym<:> { <?before ':'<alpha>> <cclass_elem>+ }
4997-
token assertion:sym<[> { <?before '['> <cclass_elem>+ }
4998-
token assertion:sym<+> { <?before '+'> <cclass_elem>+ }
4999-
token assertion:sym<-> { <?before '-'> <cclass_elem>+ }
5000+
token assertion:sym<:> { <?before ':'<alpha>> <cclass_expr> }
5001+
token assertion:sym<[> { <?before '['> <cclass_expr> }
5002+
token assertion:sym<+> { <?before '+'> <cclass_expr> }
5003+
token assertion:sym<-> { <?before '-'> <cclass_expr> }
50005004
token assertion:sym<.> { <sym> }
50015005
token assertion:sym<,> { <sym> }
50025006
token assertion:sym<~~> { <sym> [ <?before '>'> | \d+ | <desigilname> ] }
50035007

50045008
token assertion:bogus { <.panic: "Unrecognized regex assertion"> }
50055009

50065010
token sign { '+' | '-' | <?> }
5007-
token cclass_elem {
5011+
token cclass_expr {
5012+
::
5013+
<.normspace>?
5014+
<sign>
5015+
<cclass_union> ** [$<op>=[ '|' | '^' ]]
5016+
}
5017+
5018+
token cclass_union {
5019+
<.normspace>?
5020+
<cclass_add> ** [$<op>=[ '&' ]]
5021+
}
5022+
5023+
token cclass_add {
5024+
<.normspace>?
5025+
<cclass_elem> ** [$<op>=[ '+' | '-' ]]
5026+
}
5027+
5028+
token cclass_elem:name {
5029+
:dba('character class element')
5030+
<.normspace>?
5031+
<name>
5032+
<.normspace>?
5033+
}
5034+
5035+
token cclass_elem:sym<[ ]> {
50085036
:my $*CCSTATE = '';
50095037
:dba('character class element')
5010-
<sign>
50115038
<.normspace>?
5012-
[
5013-
| <name>
5014-
| <before '['> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:cc))>
5015-
| [:lang(%*LANG<MAIN>) <colonpair> ]
5016-
]
5039+
<before '['> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:cc))>
5040+
<.normspace>?
5041+
}
5042+
5043+
token cclass_elem:sym<( )> {
5044+
:my $*CCSTATE = '';
5045+
:dba('character class element')
5046+
<.normspace>?
5047+
'(' ~ ')' <cclass_expr>
5048+
<.normspace>?
5049+
}
5050+
5051+
token cclass_elem:property {
5052+
:dba('character class element')
5053+
<.normspace>?
5054+
[:lang(%*LANG<MAIN>) <colonpair> ]
50175055
<.normspace>?
50185056
}
50195057

0 commit comments

Comments
 (0)