Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

597 lines (536 sloc) 20.427 kb
class QRegex::P5Regex::Actions is HLL::Actions {
method TOP($/) {
make qbuildsub($<nibbler>.ast, :anon(1), :addself(1));
}
method nibbler($/) { make $<alternation>.ast }
method alternation($/) {
my $qast := $<sequence>[0].ast;
if +$<sequence> > 1 {
$qast := QAST::Regex.new( :rxtype<altseq>, :node($/) );
for $<sequence> { $qast.push($_.ast); }
}
make $qast;
}
method sequence($/) {
if $<quantified_atom> {
my $qast := QAST::Regex.new( :rxtype<concat>, :node($/) );
my $lastlit := 0;
for $<quantified_atom> {
my $ast := $_.ast;
if $ast {
if $lastlit && $ast.rxtype eq 'literal'
&& !QAST::Node.ACCEPTS($ast[0]) {
$lastlit[0] := $lastlit[0] ~ $ast[0];
}
else {
$qast.push($_.ast);
$lastlit := $ast.rxtype eq 'literal'
&& !QAST::Node.ACCEPTS($ast[0])
?? $ast !! 0;
}
}
}
make $qast;
}
else {
make QAST::Regex.new( :rxtype<anchor>, :name<pass>, :node($/) );
}
}
method quantified_atom($/) {
my $qast := $<atom>.ast;
if $<quantifier> {
my $ast := $<quantifier>[0].ast;
$ast.unshift($qast || QAST::Regex.new( :rxtype<anchor>, :name<pass> ));
$qast := $ast;
}
$qast.backtrack('r') if $qast && !$qast.backtrack && %*RX<r>;
make $qast;
}
method atom($/) {
if $<metachar> {
make $<metachar>.ast;
}
elsif $<esc> {
my $qast := QAST::Regex.new( ~$<esc>, :rxtype<literal>, :node($/));
make $qast;
}
else {
my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
$qast.subtype('ignorecase') if %*RX<i>;
make $qast;
}
}
method p5metachar:sym<bs>($/) {
make $<backslash>.ast;
}
method p5metachar:sym<.>($/) {
make %*RX<s>
?? QAST::Regex.new( :rxtype<cclass>, :subtype<.>, :node($/) )
!! QAST::Regex.new( :rxtype<cclass>, :subtype<nl>, :negate(1), :node($/) );
}
method p5metachar:sym<^>($/) {
make QAST::Regex.new( :rxtype<anchor>, :subtype(%*RX<m> ?? 'bol' !! 'bos'), :node($/) );
}
method p5metachar:sym<$>($/) {
make QAST::Regex.new(
:rxtype('concat'),
QAST::Regex.new(
:rxtype('quant'), :min(0), :max(1),
QAST::Regex.new( :rxtype('literal'), "\n" )
),
QAST::Regex.new( :rxtype<anchor>, :subtype(%*RX<m> ?? 'eol' !! 'eos'), :node($/) )
);
}
method p5metachar:sym<(? )>($/) {
make $<assertion>.ast;
}
method p5metachar:sym<( )>($/) {
make QAST::Regex.new( :rxtype<subcapture>, :node($/),
$<nibbler>.ast );
}
method p5metachar:sym<[ ]>($/) {
make $<cclass>.ast;
}
method cclass($/) {
my $str := '';
my $qast;
my @alts;
for $<charspec> {
if $_[1] {
my $node;
my $lhs;
my $rhs;
if $_[0]<backslash> {
$node := $_[0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$lhs := $node[0];
}
else {
$lhs := ~$_[0][0];
}
if $_[1][0]<backslash> {
$node := $_[1][0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$rhs := $node[0];
}
else {
$rhs := ~$_[1][0][0];
}
sub add_range($from, $to) {
my int $ord0 := nqp::ord($from);
my int $ord1 := nqp::ord($to);
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
if $ord0 > $ord1;
$str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1;
}
if %*RX<i> {
add_range(nqp::lc($lhs), nqp::lc($rhs));
add_range(nqp::uc($lhs), nqp::uc($rhs));
}
else {
add_range($lhs, $rhs);
}
}
elsif $_[0]<backslash> {
my $bs := $_[0]<backslash>.ast;
$bs.negate(!$bs.negate) if $<sign> eq '^';
@alts.push($bs);
}
else {
my $c := ~$_[0];
$str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
}
}
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '^' ) ))
if nqp::chars($str);
$qast := +@alts == 1 ?? @alts[0] !!
$<sign> eq '^' ??
QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
QAST::Regex.new( :rxtype<altseq>, |@alts );
make $qast;
}
method p5backslash:sym<A>($/) {
make QAST::Regex.new( :rxtype<anchor>, :subtype<bos>, :node($/) );
}
method p5backslash:sym<s>($/) {
make QAST::Regex.new(:rxtype<cclass>, '.CCLASS_WHITESPACE',
:subtype($<sym> eq 'n' ?? 'nl' !! ~$<sym>),
:negate($<sym> le 'Z'), :node($/));
}
method p5backslash:sym<b>($/) {
make QAST::Regex.new(:rxtype<subrule>, :subtype<zerowidth>,
:node($/), :negate($<sym> eq 'B'), :name(''),
QAST::Node.new( QAST::SVal.new( :value('wb') ) ));
}
method p5backslash:sym<z>($/) {
make QAST::Regex.new( :rxtype<anchor>, :subtype<eos>, :node($/) );
}
method p5backslash:sym<Z>($/) {
make QAST::Regex.new(
:rxtype('concat'),
QAST::Regex.new(
:rxtype('quant'), :min(0), :max(1),
QAST::Regex.new( :rxtype('literal'), "\n" )
),
QAST::Regex.new( :rxtype<anchor>, :subtype('eos'), :node($/) )
);
}
method p5backslash:sym<misc>($/) {
if $<litchar> {
make QAST::Regex.new( ~$<litchar> , :rxtype('literal'), :node($/) );
}
else {
make QAST::Regex.new( :rxtype<subrule>, :subtype<method>, :node($/),
QAST::Node.new(
QAST::SVal.new( :value('!BACKREF') ),
QAST::SVal.new( :value(~$<number> - 1) ) ) );
}
}
method p5assertion:sym«<»($/) {
if $<nibbler> {
make QAST::Regex.new(
:rxtype<subrule>, :subtype<zerowidth>, :negate($<neg> eq '!'), :node($/),
QAST::Node.new(
QAST::SVal.new( :value('after') ),
qbuildsub(self.flip_ast($<nibbler>.ast), :anon(1), :addself(1))
));
}
else {
make 0;
}
}
method p5assertion:sym<=>($/) {
if $<nibbler> {
make QAST::Regex.new(
:rxtype<subrule>, :subtype<zerowidth>, :node($/),
QAST::Node.new(
QAST::SVal.new( :value('before') ),
qbuildsub($<nibbler>.ast, :anon(1), :addself(1))
));
}
else {
make 0;
}
}
method p5assertion:sym<!>($/) {
if $<nibbler> {
make QAST::Regex.new(
:rxtype<subrule>, :subtype<zerowidth>, :negate(1), :node($/),
QAST::Node.new(
QAST::SVal.new( :value('before') ),
qbuildsub($<nibbler>.ast, :anon(1), :addself(1))
));
}
else {
make 0;
}
}
method p5mods($/) {
for nqp::split('', ~$<on>) {
%*RX{$_} := 1;
}
if $<off> {
for nqp::split('', ~$<off>[0]) {
%*RX{$_} := 0;
}
}
}
method p5assertion:sym<mod>($/) {
if $<nibbler> {
make $<nibbler>[0].ast;
}
else {
for %*RX {
%*OLDRX{$_.key} := $_.value;
}
make 0;
}
}
method p5quantifier:sym<*>($/) {
my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(-1), :node($/) );
make quantmod($qast, $<quantmod>);
}
method p5quantifier:sym<+>($/) {
my $qast := QAST::Regex.new( :rxtype<quant>, :min(1), :max(-1), :node($/) );
make quantmod($qast, $<quantmod>);
}
method p5quantifier:sym<?>($/) {
my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(1), :node($/) );
make quantmod($qast, ~$<quantmod>);
}
method p5quantifier:sym<{ }>($/) {
my $qast;
$qast := QAST::Regex.new( :rxtype<quant>, :min(+$<start>), :node($/) );
if $<end> { $qast.max(+$<end>[0]); }
elsif $<comma> { $qast.max(-1); }
else { $qast.max($qast.min); }
make quantmod($qast, $<quantmod>);
}
sub quantmod($ast, $mod) {
if $mod eq '?' { $ast.backtrack('f') }
elsif $mod eq '+' { $ast.backtrack('g') }
$ast;
}
our sub qbuildsub($qast, $block = QAST::Block.new(), :$anon, :$addself) {
my $blockid := $block.cuid;
my $hashpast := QAST::Op.new( :op<hash> );
for capnames($qast, 0) {
if $_.key gt '' {
$hashpast.push(QAST::SVal.new( :value($_.key) ));
$hashpast.push(QAST::IVal.new( :value(
nqp::iscclass(pir::const::CCLASS_NUMERIC, $_.key, 0) + ($_.value > 1) * 2) ));
}
}
my $initpast := QAST::Stmts.new();
if $addself {
$initpast.push(QAST::Var.new( :name('self'), :scope('local'), :decl('param') ));
}
my $capblock := QAST::BlockMemo.new( :name($blockid ~ '_caps'), $hashpast );
$initpast.push(QAST::Stmt.new($capblock));
my $nfapast := QRegex::NFA.new.addnode($qast).qast;
if $nfapast {
my $nfablock := QAST::BlockMemo.new( :name($blockid ~ '_nfa'), $nfapast);
$initpast.push(QAST::Stmt.new($nfablock));
}
unless $block.symbol('') {
$initpast.push(QAST::Var.new(:name<$¢>, :scope<lexical>, :decl('var')));
$block.symbol('', :scope<lexical>);
}
$block<orig_qast> := $qast;
$qast := QAST::Regex.new( :rxtype<concat>,
QAST::Regex.new( :rxtype<scan> ),
$qast,
($anon ??
QAST::Regex.new( :rxtype<pass> ) !!
QAST::Regex.new( :rxtype<pass>, :name(%*RX<name>) )));
$block.push($initpast);
$block.push($qast);
$block;
}
sub capnames($ast, $count) {
my %capnames;
my $rxtype := $ast.rxtype;
if $rxtype eq 'concat' {
for $ast.list {
my %x := capnames($_, $count);
for %x { %capnames{$_.key} := +%capnames{$_.key} + $_.value; }
$count := %x{''};
}
}
elsif $rxtype eq 'altseq' || $rxtype eq 'alt' {
my $max := $count;
for $ast.list {
my %x := capnames($_, $count);
for %x {
%capnames{$_.key} := +%capnames{$_.key} < 2 && %x{$_.key} == 1 ?? 1 !! 2;
}
$count := %x{''};
}
}
elsif $rxtype eq 'subrule' && $ast.subtype eq 'capture' {
my $name := $ast.name;
if $name eq '' { $name := $count; $ast.name($name); }
my @names := nqp::split('=', $name);
for @names {
if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
%capnames{$_} := 1;
}
}
elsif $rxtype eq 'subcapture' {
my $name := $ast.name;
if $name eq '' { $name := $count; $ast.name($name); }
for nqp::split(' ', $name) {
if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
%capnames{$_} := 1;
}
my %x := capnames($ast[0], $count);
for %x { %capnames{$_.key} := +%capnames{$_.key} + %x{$_.key} }
$count := %x{''};
}
elsif $rxtype eq 'quant' {
my %astcap := capnames($ast[0], $count);
$count := %astcap{''};
}
%capnames{''} := $count;
nqp::deletekey(%capnames, '$!from');
nqp::deletekey(%capnames, '$!to');
%capnames;
}
method flip_ast($qast) {
return $qast unless nqp::istype($qast, QAST::Regex);
if $qast.rxtype eq 'literal' {
$qast[0] := $qast[0].reverse();
}
elsif $qast.rxtype eq 'concat' {
my @tmp;
while +@($qast) { @tmp.push(@($qast).shift) }
while @tmp { @($qast).push(self.flip_ast(@tmp.pop)) }
}
else {
for @($qast) { self.flip_ast($_) }
}
$qast
}
# XXX Below here copied from p6regex; needs review
method metachar:sym<'>($/) {
my $quote := $<quote_EXPR>.ast;
if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
$qast.subtype('ignorecase') if %*RX<i>;
make $qast;
}
method metachar:sym<">($/) {
my $quote := $<quote_EXPR>.ast;
if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
$qast.subtype('ignorecase') if %*RX<i>;
make $qast;
}
method metachar:sym<lwb>($/) {
make QAST::Regex.new( :rxtype<anchor>, :subtype<lwb>, :node($/) );
}
method metachar:sym<rwb>($/) {
make QAST::Regex.new( :rxtype<anchor>, :subtype<rwb>, :node($/) );
}
method metachar:sym<from>($/) {
make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
:backtrack<r>, :name<$!from>, :node($/),
QAST::Node.new(
QAST::SVal.new( :value('!LITERAL') ),
QAST::SVal.new( :value('') ) ) );
}
method metachar:sym<to>($/) {
make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
:backtrack<r>, :name<$!to>, :node($/),
QAST::Node.new(
QAST::SVal.new( :value('!LITERAL') ),
QAST::SVal.new( :value('') ) ) );
}
method metachar:sym<assert>($/) {
make $<assertion>.ast;
}
method metachar:sym<var>($/) {
my $qast;
my $name := $<pos> ?? +$<pos> !! ~$<name>;
if $<quantified_atom> {
$qast := $<quantified_atom>[0].ast;
if $qast.rxtype eq 'quant' && $qast[0].rxtype eq 'subrule' {
self.subrule_alias($qast[0], $name);
}
elsif $qast.rxtype eq 'subrule' {
self.subrule_alias($qast, $name);
}
else {
$qast := QAST::Regex.new( $qast, :name($name),
:rxtype<subcapture>, :node($/) );
}
}
else {
$qast := QAST::Regex.new( :rxtype<subrule>, :subtype<method>, :node($/),
QAST::Node.new(
QAST::SVal.new( :value('!BACKREF') ),
QAST::SVal.new( :value($name) ) ) );
}
make $qast;
}
method backslash:sym<e>($/) {
my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'),
:negate($<sym> eq 'E'), :node($/) );
make $qast;
}
method backslash:sym<f>($/) {
my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'),
:negate($<sym> eq 'F'), :node($/) );
make $qast;
}
method backslash:sym<h>($/) {
my $qast := QAST::Regex.new( "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'),
:negate($<sym> eq 'H'), :node($/) );
make $qast;
}
method backslash:sym<r>($/) {
my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'),
:negate($<sym> eq 'R'), :node($/) );
make $qast;
}
method backslash:sym<t>($/) {
my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'),
:negate($<sym> eq 'T'), :node($/) );
make $qast;
}
method backslash:sym<v>($/) {
my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]",
:rxtype('enumcharlist'),
:negate($<sym> eq 'V'), :node($/) );
make $qast;
}
method backslash:sym<o>($/) {
my $octlit :=
HLL::Actions.ints_to_string( $<octint> || $<octints><octint> );
make $<sym> eq 'O'
?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'),
:negate(1), :node($/) )
!! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) );
}
method backslash:sym<x>($/) {
my $hexlit :=
HLL::Actions.ints_to_string( $<hexint> || $<hexints><hexint> );
make $<sym> eq 'X'
?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'),
:negate(1), :node($/) )
!! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) );
}
method backslash:sym<c>($/) {
make QAST::Regex.new( $<charspec>.ast, :rxtype('literal'), :node($/) );
}
method assertion:sym<name>($/) {
my $name := ~$<longname>;
my $qast;
if $<assertion> {
$qast := $<assertion>[0].ast;
self.subrule_alias($qast, $name);
}
elsif $name eq 'sym' {
my $loc := nqp::index(%*RX<name>, ':sym<');
$loc := nqp::index(%*RX<name>, ':sym«')
if $loc < 0;
my $rxname := nqp::substr(%*RX<name>, $loc + 5);
$rxname := nqp::substr($rxname, 0, nqp::chars($rxname) - 1);
$qast := QAST::Regex.new(:name('sym'), :rxtype<subcapture>, :node($/),
QAST::Regex.new(:rxtype<literal>, $rxname, :node($/)));
}
else {
$qast := QAST::Regex.new(:rxtype<subrule>, :subtype<capture>,
:node($/), :name($name),
QAST::Node.new(QAST::SVal.new( :value($name) )));
if $<arglist> {
for $<arglist>[0].ast.list { $qast[0].push( $_ ) }
}
elsif $<nibbler> {
$name eq 'after' ??
$qast[0].push(qbuildsub(self.flip_ast($<nibbler>[0].ast), :anon(1), :addself(1))) !!
$qast[0].push(qbuildsub($<nibbler>[0].ast, :anon(1), :addself(1)));
}
}
make $qast;
}
method arg($/) {
make $<quote_EXPR> ?? $<quote_EXPR>.ast !! +$<val>;
}
method arglist($/) {
my $past := QAST::Op.new( :op('list') );
for $<arg> { $past.push( $_.ast ); }
make $past;
}
method subrule_alias($ast, $name) {
if $ast.name gt '' { $ast.name( $name ~ '=' ~ $ast.name ); }
else { $ast.name($name); }
$ast.subtype('capture');
}
}
Jump to Line
Something went wrong with that request. Please try again.