Skip to content

Commit 97d40c8

Browse files
committed
Use a better data structure for Match setup info
So we can calculate once what we would previously re-calculate on every Match object construction.
1 parent 116ef73 commit 97d40c8

File tree

2 files changed

+118
-126
lines changed

2 files changed

+118
-126
lines changed

src/QRegex/Cursor.nqp

Lines changed: 24 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -343,64 +343,6 @@ role NQPMatchRole is export {
343343
# self;
344344
# }
345345

346-
my $NO_CAPS := nqp::hash();
347-
method CAPHASH() {
348-
my $caps := nqp::hash();
349-
my %caplist := $NO_CAPS;
350-
my $iter;
351-
my str $curcap;
352-
my $cs;
353-
my int $csi;
354-
my int $cselems;
355-
my $subcur;
356-
my $submatch;
357-
my str $name;
358-
359-
if !nqp::isnull($!regexsub) && nqp::defined($!regexsub) {
360-
%caplist := nqp::can($!regexsub, 'CAPS') ?? $!regexsub.CAPS() !! nqp::null();
361-
if !nqp::isnull(%caplist) && %caplist {
362-
$iter := nqp::iterator(%caplist);
363-
while $iter {
364-
$curcap := nqp::iterkey_s(nqp::shift($iter));
365-
$caps{$curcap} := nqp::list() if nqp::atkey(%caplist, $curcap) >= 2;
366-
}
367-
}
368-
}
369-
if !nqp::isnull($!cstack) && $!cstack {
370-
$cs := $!cstack;
371-
$cselems := nqp::elems($cs);
372-
while $csi < $cselems {
373-
$subcur := nqp::atpos($cs, $csi);
374-
$submatch := $subcur.MATCH;
375-
$name := nqp::getattr_s($subcur, $?CLASS, '$!name');
376-
unless nqp::isnull_s($name) {
377-
if nqp::index($name, '=') < 0 {
378-
%caplist{$name} >= 2
379-
?? nqp::push($caps{$name}, $submatch)
380-
!! nqp::bindkey($caps, $name, $submatch);
381-
}
382-
else {
383-
for nqp::split('=', $name) -> $name {
384-
%caplist{$name} >= 2
385-
?? nqp::push($caps{$name}, $submatch)
386-
!! nqp::bindkey($caps, $name, $submatch);
387-
}
388-
}
389-
}
390-
++$csi;
391-
}
392-
}
393-
394-
# Once we've produced the captures, and if we know we're finished and
395-
# will never be backtracked into, we can release cstack and regexsub.
396-
unless nqp::defined($!bstack) {
397-
$!cstack := nqp::null();
398-
$!regexsub := nqp::null();
399-
}
400-
401-
$caps;
402-
}
403-
404346
method !cursor_init($orig, :$p = 0, :$c, :$shared, :$braid, :$build, :$fail_cursor, *%ignore) {
405347
my $new := $build ?? self !! self.CREATE();
406348
unless $shared {
@@ -1137,53 +1079,44 @@ role NQPMatchRole is export {
11371079

11381080
class NQPMatch is NQPCapture does NQPMatchRole {
11391081
my @EMPTY_LIST := [];
1140-
my $NO_CAPS := nqp::hash();
1082+
my %EMPTY_HASH := nqp::hash();
11411083
my $DID_MATCH := nqp::create(NQPdidMATCH);
11421084
method MATCH() {
11431085
my $match := nqp::getattr(self, NQPMatch, '$!match');
11441086
if nqp::isnull($match) || !nqp::istype($match, NQPdidMATCH) {
11451087
# Set up basic state of (old) Match.
11461088
my $list;
1147-
my $hash := nqp::hash();
1089+
my $hash;
11481090
nqp::bindattr(self, NQPMatch, '$!match',
11491091
nqp::getattr_i(self, NQPMatch, '$!pos') >= nqp::getattr_i(self, NQPMatch, '$!from')
11501092
?? $DID_MATCH
11511093
!! nqp::null());
11521094

11531095
# For captures with lists, initialize the lists.
1154-
my %caplist := $NO_CAPS;
1155-
my $rxsub := nqp::getattr(self, NQPMatch, '$!regexsub');
1156-
my str $onlyname := '';
1157-
my int $namecount := 0;
1096+
my $rxsub := nqp::getattr(self, NQPMatch, '$!regexsub');
1097+
my str $onlyname;
1098+
my int $hascaps;
11581099
if !nqp::isnull($rxsub) && nqp::defined($rxsub) {
1159-
%caplist := nqp::can($rxsub, 'CAPS') ?? $rxsub.CAPS() !! nqp::null();
1160-
if !nqp::isnull(%caplist) && nqp::istrue(%caplist) {
1161-
my $iter := nqp::iterator(%caplist);
1162-
while $iter {
1163-
my $curcap := nqp::shift($iter);
1164-
my str $name := nqp::iterkey_s($curcap);
1165-
++$namecount;
1166-
if nqp::iterval($curcap) >= 2 {
1167-
$onlyname := $name if $namecount == 1;
1168-
nqp::ord($name) < 58
1169-
?? nqp::bindpos(
1170-
nqp::defor($list, $list := nqp::list()),
1171-
$name, nqp::list())
1172-
!! nqp::bindkey($hash, $name, nqp::list());
1173-
}
1100+
my $capdesc := nqp::can($rxsub, 'CAPS') ?? $rxsub.CAPS() !! nqp::null();
1101+
if !nqp::isnull($capdesc) {
1102+
$hascaps := $capdesc.has-captures();
1103+
if $hascaps {
1104+
$list := $capdesc.prepare-list();
1105+
$hash := $capdesc.prepare-hash();
1106+
$onlyname := $capdesc.onlyname();
11741107
}
11751108
}
11761109
}
11771110

11781111
# Walk the Cursor stack and populate the Cursor.
11791112
my $cs := nqp::getattr(self, NQPMatch, '$!cstack');
11801113
if nqp::isnull($cs) || !nqp::istrue($cs) {}
1181-
elsif $namecount == 1 && $onlyname ne '' && !nqp::eqat($onlyname, '$!', 0) {
1114+
elsif $onlyname ne '' {
11821115
# If there's only one destination, avoid repeated hash lookups
11831116
my int $cselems := nqp::elems($cs);
11841117
my int $csi;
11851118
my $dest;
1186-
if nqp::ord($onlyname) < 58 {
1119+
if nqp::ord($onlyname) != 38 && nqp::ord($onlyname) < 58 {
11871120
$dest := nqp::atpos($list, $onlyname);
11881121
}
11891122
else {
@@ -1197,78 +1130,49 @@ class NQPMatch is NQPCapture does NQPMatchRole {
11971130
++$csi;
11981131
}
11991132
}
1200-
elsif !nqp::isnull(%caplist) && %caplist {
1133+
elsif $hascaps {
12011134
my int $cselems := nqp::elems($cs);
12021135
my int $csi;
1203-
# note($cselems);
12041136
while $csi < $cselems {
12051137
my $subcur := nqp::atpos($cs, $csi);
12061138
my str $name := nqp::getattr_s($subcur, $?CLASS, '$!name');
1207-
if !nqp::isnull_s($name) && nqp::defined($name) && $name ne '' {
1139+
if !nqp::isnull_s($name) && $name ne '' {
12081140
my $submatch := $subcur.MATCH();
12091141
if nqp::ord($name) == 36 && ($name eq '$!from' || $name eq '$!to') {
12101142
nqp::bindattr_i(self, NQPMatch, $name, $submatch.from);
12111143
}
12121144
elsif nqp::index($name, '=') < 0 {
1213-
my int $needs_list := %caplist{$name} >= 2;
12141145
if nqp::ord($name) < 58 {
1215-
$list := nqp::list() unless nqp::isconcrete($list);
1216-
$needs_list
1146+
nqp::islist(nqp::atpos($list, $name))
12171147
?? nqp::push(nqp::atpos($list, $name), $submatch)
12181148
!! nqp::bindpos($list, $name, $submatch);
12191149
}
12201150
else {
1221-
$needs_list
1222-
?? nqp::push($hash{$name}, $submatch)
1151+
nqp::islist(nqp::atkey($hash, $name))
1152+
?? nqp::push(nqp::atkey($hash, $name), $submatch)
12231153
!! nqp::bindkey($hash, $name, $submatch);
12241154
}
12251155
}
12261156
else {
12271157
for nqp::split('=', $name) -> $name {
1228-
my int $needs_list := %caplist{$name} >= 2;
12291158
if nqp::ord($name) < 58 {
1230-
$list := nqp::list() unless nqp::isconcrete($list);
1231-
$needs_list
1159+
nqp::islist(nqp::atkey($hash, $name))
12321160
?? nqp::push(nqp::atpos($list, $name), $submatch)
12331161
!! nqp::bindpos($list, $name, $submatch);
12341162
}
12351163
else {
1236-
$needs_list
1237-
?? nqp::push($hash{$name}, $submatch)
1164+
nqp::islist(nqp::atkey($hash, $name))
1165+
?? nqp::push(nqp::atkey($hash, $name), $submatch)
12381166
!! nqp::bindkey($hash, $name, $submatch);
12391167
}
12401168
}
12411169
}
12421170
}
12431171
++$csi;
12441172
}
1245-
# {
1246-
# my $iter := nqp::iterator(%caplist);
1247-
# while $iter {
1248-
# my $curcap := nqp::shift($iter);
1249-
# my str $name := nqp::iterkey_s($curcap);
1250-
# my int $iv := nqp::iterval($curcap);
1251-
# if $iv >= 2 {
1252-
# if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
1253-
# stderr().print("\t" ~ $name ~ "\t" ~ nqp::elems(nqp::atpos($list, $name)));
1254-
# }
1255-
# else {
1256-
# stderr().print("\t" ~ $name ~ "\t" ~ nqp::elems(nqp::atkey($hash, $name)));
1257-
# }
1258-
# }
1259-
# elsif $iv >= 1 {
1260-
# if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
1261-
# stderr().print("\t" ~ $name ~ "\t" ~ nqp::defined(nqp::atpos($list, $name)));
1262-
# }
1263-
# else {
1264-
# stderr().print("\t" ~ $name ~ "\t" ~ nqp::defined(nqp::atkey($hash, $name)));
1265-
# }
1266-
# }
1267-
# }
1268-
# }
12691173
}
1270-
nqp::bindattr(self, NQPCapture, '@!array', nqp::isconcrete($list) ?? $list !! @EMPTY_LIST);
1271-
nqp::bindattr(self, NQPCapture, '%!hash', $hash);
1174+
nqp::bindattr(self, NQPCapture, '@!array', nqp::defor($list, @EMPTY_LIST));
1175+
nqp::bindattr(self, NQPCapture, '%!hash', nqp::defor($hash, %EMPTY_HASH));
12721176

12731177
# Once we've produced the captures, and if we know we're finished and
12741178
# will never be backtracked into, we can release cstack and regexsub.

src/core/NQPRoutine.nqp

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,98 @@ my knowhow NQPSignature {
363363
method definednesses() { $!definednesses }
364364
}
365365

366+
# Data on the captures that a particular rule has.
367+
my knowhow RegexCaptures {
368+
# An integer array of positional capture counts.
369+
has @!pos-capture-counts;
370+
371+
# A string array of named capture names and a matching integer array of
372+
# capture counts.
373+
has @!named-capture-names;
374+
has @!named-capture-counts;
375+
376+
# Form this data structure from a capnames hash.
377+
method from-capnames(%capnames) {
378+
nqp::create(self).'!from-capnames'(%capnames)
379+
}
380+
381+
method !from-capnames(%capnames) {
382+
# Initialize.
383+
@!pos-capture-counts := nqp::list_i();
384+
@!named-capture-names := nqp::list_s();
385+
@!named-capture-counts := nqp::list_i();
386+
387+
# Go over the captures and build up the data structure.
388+
for %capnames {
389+
my $name := nqp::iterkey_s($_);
390+
if $name ne '' {
391+
my $count := nqp::iterval($_);
392+
if nqp::ord($name) != 36 && nqp::ord($name) < 58 {
393+
nqp::bindpos_i(@!pos-capture-counts, +$name, $count);
394+
}
395+
else {
396+
nqp::push_s(@!named-capture-names, $name);
397+
nqp::push_i(@!named-capture-counts, $count);
398+
}
399+
}
400+
}
401+
402+
self
403+
}
404+
405+
# Are there any captures?
406+
method has-captures() {
407+
nqp::elems(@!named-capture-counts) || nqp::elems(@!pos-capture-counts)
408+
}
409+
410+
# Build a list of positional captures, or return a shared empty list if
411+
# there are none. This only populates the slots which need an array.
412+
my $EMPTY-LIST := nqp::list();
413+
my $EMPTY-HASH := nqp::list();
414+
method prepare-list() {
415+
my int $n := nqp::elems(@!pos-capture-counts);
416+
if $n > 0 {
417+
my $result := nqp::list();
418+
my int $i := 0;
419+
while $i < $n {
420+
nqp::bindpos($result, $i, nqp::list())
421+
if nqp::atpos_i(@!pos-capture-counts, $i) >= 2;
422+
$i++;
423+
}
424+
$result
425+
}
426+
else {
427+
$EMPTY-LIST
428+
}
429+
}
430+
431+
# Build a hash of named camptures, or return a shared empty hash if there
432+
# are none. This only poplates the slots that need an array.
433+
method prepare-hash() {
434+
my int $n := nqp::elems(@!named-capture-counts);
435+
if $n > 0 {
436+
my $result := nqp::hash();
437+
my int $i := 0;
438+
while $i < $n {
439+
if nqp::atpos_i(@!named-capture-counts, $i) >= 2 {
440+
nqp::bindkey($result,
441+
nqp::atpos_s(@!named-capture-names, $i),
442+
nqp::list());
443+
}
444+
$i++;
445+
}
446+
$result
447+
}
448+
else {
449+
$EMPTY-HASH
450+
}
451+
}
452+
453+
# Get the name of the only capture, if there is only one.
454+
method onlyname() { '' }
455+
}
456+
457+
366458
my knowhow NQPRegex {
367459
has $!do;
368460
has $!caps;
@@ -371,12 +463,8 @@ my knowhow NQPRegex {
371463
has $!generic_nfa;
372464
has @!nested_codes;
373465
has $!clone_callback;
374-
method SET_CAPS($caps) {
375-
my %h_caps;
376-
for $caps {
377-
%h_caps{$_.key} := $_.value unless $_.key eq '';
378-
}
379-
$!caps := %h_caps;
466+
method SET_CAPS(%capnames) {
467+
$!caps := RegexCaptures.from-capnames(%capnames);
380468
}
381469
method SET_NFA($nfa) {
382470
$!nfa := self.'!hllize_nfa'($nfa);

0 commit comments

Comments
 (0)