Skip to content

Commit 3629cd6

Browse files
committed
Handle case-insensitive literals in LTM.
1 parent 3cfcb96 commit 3629cd6

File tree

2 files changed

+64
-24
lines changed

2 files changed

+64
-24
lines changed

src/QRegex/NFA.nqp

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
use QASTNode;
22

33
class QRegex::NFA {
4-
our $EDGE_FATE := 0;
5-
our $EDGE_EPSILON := 1;
6-
our $EDGE_CODEPOINT := 2;
7-
our $EDGE_CODEPOINT_NEG := 3;
8-
our $EDGE_CHARCLASS := 4;
9-
our $EDGE_CHARCLASS_NEG := 5;
10-
our $EDGE_CHARLIST := 6;
11-
our $EDGE_CHARLIST_NEG := 7;
12-
our $EDGE_SUBRULE := 8;
4+
our $EDGE_FATE := 0;
5+
our $EDGE_EPSILON := 1;
6+
our $EDGE_CODEPOINT := 2;
7+
our $EDGE_CODEPOINT_NEG := 3;
8+
our $EDGE_CHARCLASS := 4;
9+
our $EDGE_CHARCLASS_NEG := 5;
10+
our $EDGE_CHARLIST := 6;
11+
our $EDGE_CHARLIST_NEG := 7;
12+
our $EDGE_SUBRULE := 8;
13+
our $EDGE_CODEPOINT_I := 9;
14+
our $EDGE_CODEPOINT_I_NEG := 10;
1315

1416
has $!states;
1517
has $!edges;
@@ -104,15 +106,28 @@ class QRegex::NFA {
104106
}
105107

106108
method literal($node, int $from, int $to) {
107-
my str $litconst := $node[0];
108-
my int $litlen := nqp::chars($litconst) - 1;
109+
my int $litlen := nqp::chars($node[0]) - 1;
109110
my int $i := 0;
110111
if $litlen >= 0 {
111-
while $i < $litlen {
112-
$from := self.addedge($from, -1, $EDGE_CODEPOINT, nqp::ord($litconst, $i));
113-
$i := $i + 1;
112+
if $node.subtype eq 'ignorecase' {
113+
my str $litconst_lc := nqp::lc($node[0]);
114+
my str $litconst_uc := nqp::uc($node[0]);
115+
while $i < $litlen {
116+
$from := self.addedge($from, -1, $EDGE_CODEPOINT_I,
117+
[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]);
118+
$i := $i + 1;
119+
}
120+
self.addedge($from, $to, $EDGE_CODEPOINT_I,
121+
[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]);
122+
}
123+
else {
124+
my str $litconst := $node[0];
125+
while $i < $litlen {
126+
$from := self.addedge($from, -1, $EDGE_CODEPOINT, nqp::ord($litconst, $i));
127+
$i := $i + 1;
128+
}
129+
self.addedge($from, $to, $EDGE_CODEPOINT, nqp::ord($litconst, $i));
114130
}
115-
self.addedge($from, $to, $EDGE_CODEPOINT, nqp::ord($litconst, $i));
116131
}
117132
else {
118133
self.addedge($from, $to, $EDGE_EPSILON, 0);
@@ -248,7 +263,14 @@ class QRegex::NFA {
248263
for $!states -> @values {
249264
my $list := QAST::Op.new(:op<list>);
250265
for @values {
251-
if $_ ~~ QAST::SVal {
266+
if nqp::islist($_) {
267+
my $arglist := QAST::Op.new( :op('list_i') );
268+
for $_ -> $i {
269+
$arglist.push(QAST::IVal.new( :value($i) ));
270+
}
271+
$list.push($arglist);
272+
}
273+
elsif $_ ~~ QAST::SVal {
252274
$list.push($_);
253275
}
254276
elsif +$_ eq $_ {

src/ops/nqp.ops

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,16 @@ static PMC *nqpevent_fh = NULL;
3939
static INTVAL nqpdebflags_i = 0;
4040

4141
/* NFA constants. */
42-
#define EDGE_FATE 0
43-
#define EDGE_EPSILON 1
44-
#define EDGE_CODEPOINT 2
45-
#define EDGE_CODEPOINT_NEG 3
46-
#define EDGE_CHARCLASS 4
47-
#define EDGE_CHARCLASS_NEG 5
48-
#define EDGE_CHARLIST 6
49-
#define EDGE_CHARLIST_NEG 7
42+
#define EDGE_FATE 0
43+
#define EDGE_EPSILON 1
44+
#define EDGE_CODEPOINT 2
45+
#define EDGE_CODEPOINT_NEG 3
46+
#define EDGE_CHARCLASS 4
47+
#define EDGE_CHARCLASS_NEG 5
48+
#define EDGE_CHARLIST 6
49+
#define EDGE_CHARLIST_NEG 7
50+
#define EDGE_CODEPOINT_I 9
51+
#define EDGE_CODEPOINT_I_NEG 10
5052

5153
/* Objects we use every time we run an NFA; since we always run one NFA
5254
* at a time, we can re-use these. */
@@ -251,6 +253,22 @@ static INTVAL * nqp_nfa_run(PARROT_INTERP, PMC *states, STRING *target, INTVAL o
251253
if (STRING_index(interp, arg, chr, 0) < 0)
252254
VTABLE_push_integer(interp, nextst, to);
253255
}
256+
else if (act == EDGE_CODEPOINT_I) {
257+
PMC *arg = VTABLE_get_pmc_keyed_int(interp, edge_info, i + 1);
258+
UINTVAL lc_arg = VTABLE_get_integer_keyed_int(interp, arg, 0);
259+
UINTVAL uc_arg = VTABLE_get_integer_keyed_int(interp, arg, 1);
260+
UINTVAL ord = STRING_ord(interp, target, offset);
261+
if (ord == lc_arg || ord == uc_arg)
262+
VTABLE_push_integer(interp, nextst, to);
263+
}
264+
else if (act == EDGE_CODEPOINT_I_NEG) {
265+
PMC *arg = VTABLE_get_pmc_keyed_int(interp, edge_info, i + 1);
266+
UINTVAL lc_arg = VTABLE_get_integer_keyed_int(interp, arg, 0);
267+
UINTVAL uc_arg = VTABLE_get_integer_keyed_int(interp, arg, 1);
268+
UINTVAL ord = STRING_ord(interp, target, offset);
269+
if (ord != lc_arg && ord != uc_arg)
270+
VTABLE_push_integer(interp, nextst, to);
271+
}
254272
}
255273
}
256274

0 commit comments

Comments
 (0)