Skip to content

Commit

Permalink
Cheapean Match object construction
Browse files Browse the repository at this point in the history
Move towards a better data structure for the Match initialization of
arrays for quantified captures.
  • Loading branch information
jnthn committed Jun 10, 2019
1 parent 9dad99b commit 36fa16b
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 62 deletions.
101 changes: 96 additions & 5 deletions src/Perl6/Metamodel/BOOTSTRAP.nqp
Expand Up @@ -3195,22 +3195,113 @@ BEGIN {
Submethod.HOW.compose_repr(Submethod);
Submethod.HOW.compose_invocation(Submethod);

# Capture store for SET_CAPS.
my class RegexCaptures {
# An integer array of positional capture counts.
has @!pos-capture-counts;

# A string array of named capture names and a matching integer array of
# capture counts.
has @!named-capture-names;
has @!named-capture-counts;

# Form this data structure from a capnames hash.
method from-capnames(%capnames) {
nqp::create(self).'!from-capnames'(%capnames)
}

method !from-capnames(%capnames) {
# Initialize.
@!pos-capture-counts := nqp::list_i();
@!named-capture-names := nqp::list_s();
@!named-capture-counts := nqp::list_i();

# Go over the captures and build up the data structure.
for %capnames {
my $name := nqp::iterkey_s($_);
if $name ne '' {
my $count := nqp::iterval($_);
if nqp::ord($name) != 36 && nqp::ord($name) < 58 {
nqp::bindpos_i(@!pos-capture-counts, +$name, $count);
}
else {
nqp::push_s(@!named-capture-names, $name);
nqp::push_i(@!named-capture-counts, $count);
}
}
}

self
}

# Are there any captures?
method has-captures() {
nqp::elems(@!named-capture-counts) || nqp::elems(@!pos-capture-counts)
}

# Build a list of positional captures, or return a shared empty list if
# there are none. This only populates the slots which need an array.
my $EMPTY-LIST := nqp::list();
my $EMPTY-HASH := nqp::list();

This comment has been minimized.

Copy link
@mlschroe

mlschroe Jun 12, 2019

Contributor

Shouldn't that be nqp::hash()?

This comment has been minimized.

Copy link
@lizmat

lizmat Jun 12, 2019

Contributor

Indeed... it probably should

This comment has been minimized.

Copy link
@lizmat

lizmat Jun 12, 2019

Contributor

Thanks!, fixed with 95a7866

method prepare-list() {
my int $n := nqp::elems(@!pos-capture-counts);
if $n > 0 {
my $result := nqp::list();
my int $i := 0;
while $i < $n {
nqp::bindpos($result, $i, nqp::create(Array))
if nqp::atpos_i(@!pos-capture-counts, $i) >= 2;
$i++;
}
$result
}
else {
$EMPTY-LIST
}
}

# Build a hash of named camptures, or return a shared empty hash if there
# are none. This only poplates the slots that need an array.
method prepare-hash() {
my int $n := nqp::elems(@!named-capture-counts);
if $n > 0 {
my $result := nqp::hash();
my int $i := 0;
while $i < $n {
if nqp::atpos_i(@!named-capture-counts, $i) >= 2 {
nqp::bindkey($result,
nqp::atpos_s(@!named-capture-names, $i),
nqp::create(Array));
}
$i++;
}
$result
}
else {
$EMPTY-HASH
}
}

# Get the name of the only capture, if there is only one.
method onlyname() { '' }
}
# class Regex is Method {
# has @!caps;
# has $!caps;
# has Mu $!nfa;
# has @!alt_nfas;
# has str $!source;
# has $!topic;
# has $!slash;
Regex.HOW.add_parent(Regex, Method);
Regex.HOW.add_attribute(Regex, scalar_attr('@!caps', List, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('$!caps', Mu, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('$!nfa', Mu, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('%!alt_nfas', Hash, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('$!source', str, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('$!topic', Mu, Regex));
Regex.HOW.add_attribute(Regex, scalar_attr('$!slash', Mu, Regex));
Regex.HOW.add_method(Regex, 'SET_CAPS', nqp::getstaticcode(sub ($self, $caps) {
nqp::bindattr(nqp::decont($self), Regex, '@!caps', $caps)
Regex.HOW.add_method(Regex, 'SET_CAPS', nqp::getstaticcode(sub ($self, $capnames) {
nqp::bindattr(nqp::decont($self), Regex, '$!caps',
RegexCaptures.from-capnames($capnames))
}));
Regex.HOW.add_method(Regex, 'SET_NFA', nqp::getstaticcode(sub ($self, $nfa) {
nqp::bindattr(nqp::decont($self), Regex, '$!nfa', $nfa)
Expand All @@ -3224,7 +3315,7 @@ BEGIN {
nqp::bindkey(%alts, $name, $nfa);
}));
Regex.HOW.add_method(Regex, 'CAPS', nqp::getstaticcode(sub ($self) {
nqp::getattr(nqp::decont($self), Regex, '@!caps')
nqp::getattr(nqp::decont($self), Regex, '$!caps')
}));
Regex.HOW.add_method(Regex, 'NFA', nqp::getstaticcode(sub ($self) {
nqp::getattr(nqp::decont($self), Regex, '$!nfa')
Expand Down
94 changes: 38 additions & 56 deletions src/core/Match.pm6
@@ -1,5 +1,6 @@
my class Match is Capture is Cool does NQPMatchRole {
my Mu $EMPTY_LIST := nqp::list();
my Mu $EMPTY_HASH := nqp::hash();
my Mu $NO_CAPS := nqp::hash();
my Mu $DID_MATCH := nqp::create(NQPdidMATCH);

Expand Down Expand Up @@ -29,51 +30,47 @@ my class Match is Capture is Cool does NQPMatchRole {
nqp::getattr_i(self,Match,'$!pos'),
nqp::getattr_i(self, Match, '$!from'))
?? self!MATCH-PASS()
!! self!MATCH-FAIL()
!! self!MATCH-EMPTY()
}

method !MATCH-FAIL() {
method !MATCH-EMPTY() {
nqp::bindattr(self, Capture, '@!list', $EMPTY_LIST);
nqp::bindattr(self, Capture, '%!hash', nqp::hash());
nqp::bindattr(self, Capture, '%!hash', $EMPTY_HASH);
nqp::bindattr(self, Match, '$!match', $DID_MATCH);
self
}

method !MATCH-PASS() {
my Mu $list := Nil;
my Mu $hash := nqp::hash();

# For captures with lists, initialize the lists.
my $caplist := $NO_CAPS;
my $rxsub := nqp::getattr(self, Match, '$!regexsub');
my str $onlyname = '';
my int $namecount = 0;

if nqp::not_i(nqp::isnull($rxsub)) {
$caplist := nqp::can($rxsub, 'CAPS') ?? nqp::findmethod($rxsub, 'CAPS')($rxsub) !! nqp::null();
if nqp::not_i(nqp::isnull($caplist)) && nqp::istrue($caplist) {
my $iter := nqp::iterator($caplist);
my str $name;
while $iter {
$namecount = nqp::add_i($namecount, 1);
if nqp::iterval(nqp::shift($iter)) >= 2 {
$name = nqp::iterkey_s($iter);
nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0)
?? nqp::bindpos(
nqp::if(nqp::isconcrete($list), $list, ($list := nqp::list())),
nqp::fromstr_I($name, Int), nqp::create(Array))
!! nqp::bindkey($hash, $name, nqp::create(Array));
}
}
$onlyname = $name if nqp::iseq_i($namecount, 1);
}
# Build captures if needed.
my $rxsub := nqp::getattr(self, Match, '$!regexsub');
nqp::isnull($rxsub) ||
nqp::isnull(my $cap-meth := nqp::tryfindmethod($rxsub, 'CAPS')) ||
nqp::isnull(my $caps := $cap-meth($rxsub)) || !$caps.has-captures()
?? self!MATCH-EMPTY()
!! self!MATCH-CAPTURES();

# Once we've produced the captures, and if we know we're finished and
# will never be backtracked into, we can release cstack and regexsub.
unless nqp::defined(nqp::getattr(self, Match, '$!bstack')) {
nqp::bindattr(self, Match, '$!cstack', nqp::null());
nqp::bindattr(self, Match, '$!regexsub', nqp::null());
}

self
}

method !MATCH-CAPTURES() {
# Initialize capture lists.
my $rxsub := nqp::getattr(self, Match, '$!regexsub');
my $capdesc := nqp::findmethod($rxsub, 'CAPS')($rxsub);
my $list := nqp::findmethod($capdesc, 'prepare-list')($capdesc);
my $hash := nqp::findmethod($capdesc, 'prepare-hash')($capdesc);
my str $onlyname = $capdesc.onlyname();

# Walk the capture stack and populate the Match.
my Mu $cs := nqp::getattr(self, Match, '$!cstack');
if nqp::isnull($cs) || nqp::not_i(nqp::istrue($cs)) {}
elsif nqp::not_i(nqp::istrue($caplist)) {}
elsif nqp::iseq_i($namecount, 1) && nqp::isgt_i(nqp::chars($onlyname), 0) && nqp::eqat($onlyname, '$!', 0) {
elsif nqp::isgt_i(nqp::chars($onlyname), 0) {
# If there's only one destination, avoid repeated hash lookups
my int $cselems = nqp::elems($cs);
my int $csi = -1;
Expand Down Expand Up @@ -106,37 +103,31 @@ my class Match is Capture is Cool does NQPMatchRole {
nqp::bindattr_i(self, Match, $name, $submatch.from);
}
elsif nqp::islt_i(nqp::index($name, '='), 0) {
my Mu $capval := nqp::atkey($caplist, $name);
my int $needs_list = nqp::isconcrete($capval) && $capval >= 2;
if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
$list := nqp::list() unless nqp::isconcrete($list);
$needs_list
?? nqp::atpos($list, nqp::fromstr_I($name, Int)).append($submatch)
!! nqp::bindpos($list, nqp::fromstr_I($name, Int), $submatch);
my $idx := nqp::fromstr_I($name, Int);
nqp::istype(nqp::atpos($list, $idx), Array)
?? nqp::atpos($list, $idx).append($submatch)
!! nqp::bindpos($list, $idx, $submatch);
}
else {
$needs_list
nqp::istype(nqp::atkey($hash, $name), Array)
?? nqp::atkey($hash, $name).append($submatch)
!! nqp::bindkey($hash, $name, $submatch);
}
}
else {
my $names := nqp::split('=', $name);
my $iter := nqp::iterator($names);
my Mu $capval;
my int $needs_list;
while $iter {
$name = nqp::shift($iter);
$capval := nqp::atkey($caplist, $name);
$needs_list = nqp::isconcrete($capval) && $capval >= 2;
$name = nqp::shift($iter);
if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
$list := nqp::list() unless nqp::isconcrete($list);
$needs_list
my $idx := nqp::fromstr_I($name, Int);
nqp::istype(nqp::atpos($list, $idx), Array)
?? nqp::atpos($list, nqp::fromstr_I($name, Int)).append($submatch)
!! nqp::bindpos($list, nqp::fromstr_I($name, Int), $submatch);
}
else {
$needs_list
nqp::istype(nqp::atkey($hash, $name), Array)
?? nqp::atkey($hash, $name).append($submatch)
!! nqp::bindkey($hash, $name, $submatch);
}
Expand All @@ -148,15 +139,6 @@ my class Match is Capture is Cool does NQPMatchRole {
nqp::bindattr(self, Capture, '@!list', nqp::isconcrete($list) ?? $list !! $EMPTY_LIST);
nqp::bindattr(self, Capture, '%!hash', $hash);
nqp::bindattr(self, Match, '$!match', $DID_MATCH);

# Once we've produced the captures, and if we know we're finished and
# will never be backtracked into, we can release cstack and regexsub.
unless nqp::defined(nqp::getattr(self, Match, '$!bstack')) {
nqp::bindattr(self, Match, '$!cstack', nqp::null());
nqp::bindattr(self, Match, '$!regexsub', nqp::null());
}

self
}

method CURSOR_NEXT() { # from !cursor_next in nqp
Expand Down
2 changes: 1 addition & 1 deletion src/core/Regex.pm6
@@ -1,6 +1,6 @@
my class Regex { # declared in BOOTSTRAP
# class Regex is Method
# has @!caps;
# has $!caps;
# has Mu $!nfa;
# has %!alt_nfas;
# has str $!source;
Expand Down

0 comments on commit 36fa16b

Please sign in to comment.