Skip to content

Commit 26dd621

Browse files
committed
Always evaluate char ranges at codepoint level.
There are some cases where building them with normalized strings leads to the range being reversed and similar.
1 parent 9e2b706 commit 26dd621

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

src/QRegex/P6Regex/Actions.nqp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -451,19 +451,23 @@ class QRegex::P6Regex::Actions is HLL::Actions {
451451
method cclass_backslash:sym<o>($/) {
452452
my $octlit :=
453453
HLL::Actions.ints_to_string( $<octint> || $<octints><octint> );
454-
make $<sym> eq 'O'
454+
my $ast := $<sym> eq 'O'
455455
?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'),
456456
:negate(1), :node($/) )
457457
!! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) );
458+
$ast.annotate('codepoint', $<octint> ?? $<octint>.ast !! $<octints><octint>[0].ast);
459+
make $ast;
458460
}
459461

460462
method cclass_backslash:sym<x>($/) {
461463
my $hexlit :=
462464
HLL::Actions.ints_to_string( $<hexint> || $<hexints><hexint> );
463-
make $<sym> eq 'X'
465+
my $ast := $<sym> eq 'X'
464466
?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'),
465467
:negate(1), :node($/) )
466468
!! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) );
469+
$ast.annotate('codepoint', $<hexint> ?? $<hexint>.ast !! $<hexints><hexint>[0].ast);
470+
make $ast;
467471
}
468472

469473
method cclass_backslash:sym<c>($/) {
@@ -645,30 +649,28 @@ class QRegex::P6Regex::Actions is HLL::Actions {
645649
for $<charspec> {
646650
if $_[1] {
647651
my $node;
648-
my $lhs;
649-
my $rhs;
652+
my $ord0;
653+
my $ord1;
650654
if $_[0]<cclass_backslash> {
651655
$node := $_[0]<cclass_backslash>.ast;
652656
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
653657
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
654658
|| $node.negate || nqp::chars($node[0]) != 1;
655-
$lhs := $node[0];
659+
$ord0 := $node.ann('codepoint') // nqp::ord($node[0]);
656660
}
657661
else {
658-
$lhs := ~$_[0][0];
662+
$ord0 := nqp::ord(~$_[0][0]);
659663
}
660664
if $_[1][0]<cclass_backslash> {
661665
$node := $_[1][0]<cclass_backslash>.ast;
662666
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
663667
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
664668
|| $node.negate || nqp::chars($node[0]) != 1;
665-
$rhs := $node[0];
669+
$ord1 := $node.ann('codepoint') // nqp::ord($node[0]);
666670
}
667671
else {
668-
$rhs := ~$_[1][0][0];
672+
$ord1 := nqp::ord(~$_[1][0][0]);
669673
}
670-
my $ord0 := nqp::ord($lhs);
671-
my $ord1 := nqp::ord($rhs);
672674
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
673675
if $ord0 > $ord1;
674676
@alts.push(QAST::Regex.new(

0 commit comments

Comments
 (0)