Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Copy P6Regex to get a P5Regex. Start making a few of the basic change…

…s needed, looking to STD_P5 for inspiration.
  • Loading branch information...
commit b1a3a4611be9ddbfc1006b7714005a47fec45d39 1 parent dd9cb19
Jonathan Worthington jnthn authored
674 src/QRegex/P5Regex/Actions.nqp
View
@@ -0,0 +1,674 @@
+class QRegex::P5Regex::Actions is HLL::Actions {
+ method TOP($/) {
+ make buildsub($<nibbler>.ast);
+ }
+
+ method nibbler($/) { make $<termaltseq>.ast }
+
+ method termaltseq($/) {
+ my $qast := $<termish>[0].ast;
+ if +$<termish> > 1 {
+ $qast := QAST::Regex.new( :rxtype<altseq>, :node($/) );
+ for $<termish> { $qast.push($_.ast); }
+ }
+ make $qast;
+ }
+
+ method termish($/) {
+ my $qast := QAST::Regex.new( :rxtype<concat>, :node($/) );
+ my $lastlit := 0;
+ for $<noun> {
+ my $ast := $_.ast;
+ if $ast {
+ if $lastlit && $ast.rxtype eq 'literal'
+ && !QAST::Node.ACCEPTS($ast[0]) {
+ $lastlit[0] := $lastlit[0] ~ $ast[0];
+ }
+ else {
+ $qast.push($_.ast);
+ $lastlit := $ast.rxtype eq 'literal'
+ && !QAST::Node.ACCEPTS($ast[0])
+ ?? $ast !! 0;
+ }
+ }
+ }
+ make $qast;
+ }
+
+ method quantified_atom($/) {
+ my $qast := $<atom>.ast;
+ if $<quantifier> {
+ my $ast := $<quantifier>[0].ast;
+ $ast.unshift($qast);
+ $qast := $ast;
+ }
+ $qast.backtrack('r') if $qast && !$qast.backtrack &&
+ (%*RX<r> || $<backmod> && ~$<backmod>[0] eq ':');
+ make $qast;
+ }
+
+ method atom($/) {
+ if $<metachar> {
+ make $<metachar>.ast;
+ }
+ else {
+ my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
+ $qast.subtype('ignorecase') if %*RX<i>;
+ make $qast;
+ }
+ }
+
+ method quantifier:sym<*>($/) {
+ my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(-1), :node($/) );
+ make backmod($qast, $<backmod>);
+ }
+
+ method quantifier:sym<+>($/) {
+ my $qast := QAST::Regex.new( :rxtype<quant>, :min(1), :max(-1), :node($/) );
+ make backmod($qast, $<backmod>);
+ }
+
+ method quantifier:sym<?>($/) {
+ my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(1), :node($/) );
+ make backmod($qast, $<backmod>);
+ }
+
+ method quantifier:sym<**>($/) {
+ my $qast;
+ $qast := QAST::Regex.new( :rxtype<quant>, :min(+$<min>), :max(-1), :node($/) );
+ if ! $<max> { $qast.max(+$<min>) }
+ elsif $<max>[0] ne '*' { $qast.max(+$<max>[0]); }
+ make backmod($qast, $<backmod>);
+ }
+
+ method metachar:sym<ws>($/) {
+ my $qast := %*RX<s>
+ ?? QAST::Regex.new(PAST::Node.new('ws'), :rxtype<ws>, :subtype<method>, :node($/))
+ !! 0;
+ make $qast;
+ }
+
+ method metachar:sym<[ ]>($/) {
+ make $<nibbler>.ast;
+ }
+
+ method metachar:sym<( )>($/) {
+ my $subpast := PAST::Node.new(buildsub($<nibbler>.ast, :anon(1)));
+ my $qast := QAST::Regex.new( $subpast, $<nibbler>.ast, :rxtype('subrule'),
+ :subtype('capture'), :node($/) );
+ make $qast;
+ }
+
+ method metachar:sym<'>($/) {
+ my $quote := $<quote_EXPR>.ast;
+ if PAST::Val.ACCEPTS($quote) { $quote := $quote.value; }
+ if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
+ my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
+ $qast.subtype('ignorecase') if %*RX<i>;
+ make $qast;
+ }
+
+ method metachar:sym<">($/) {
+ my $quote := $<quote_EXPR>.ast;
+ if PAST::Val.ACCEPTS($quote) { $quote := $quote.value; }
+ if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
+ my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
+ $qast.subtype('ignorecase') if %*RX<i>;
+ make $qast;
+ }
+
+ method metachar:sym<.>($/) {
+ make QAST::Regex.new( :rxtype<cclass>, :subtype<.>, :node($/) );
+ }
+
+ method metachar:sym<^>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<bos>, :node($/) );
+ }
+
+ method metachar:sym<^^>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<bol>, :node($/) );
+ }
+
+ method metachar:sym<$>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<eos>, :node($/) );
+ }
+
+ method metachar:sym<$$>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<eol>, :node($/) );
+ }
+
+ method metachar:sym<lwb>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<lwb>, :node($/) );
+ }
+
+ method metachar:sym<rwb>($/) {
+ make QAST::Regex.new( :rxtype<anchor>, :subtype<rwb>, :node($/) );
+ }
+
+ method metachar:sym<from>($/) {
+ make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
+ :backtrack<r>,
+ :name<$!from>, PAST::Node.new('!LITERAL', ''), :node($/) );
+ }
+
+ method metachar:sym<to>($/) {
+ make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
+ :backtrack<r>,
+ :name<$!to>, PAST::Node.new('!LITERAL', ''), :node($/) );
+ }
+
+ method metachar:sym<bs>($/) {
+ make $<backslash>.ast;
+ }
+
+ method metachar:sym<assert>($/) {
+ make $<assertion>.ast;
+ }
+
+ method metachar:sym<var>($/) {
+ my $qast;
+ my $name := $<pos> ?? +$<pos> !! ~$<name>;
+ if $<quantified_atom> {
+ $qast := $<quantified_atom>[0].ast;
+ if $qast.rxtype eq 'quant' && $qast[0].rxtype eq 'subrule' {
+ self.subrule_alias($qast[0], $name);
+ }
+ elsif $qast.rxtype eq 'subrule' {
+ self.subrule_alias($qast, $name);
+ }
+ else {
+ $qast := QAST::Regex.new( $qast, :name($name),
+ :rxtype<subcapture>, :node($/) );
+ }
+ }
+ else {
+ $qast := QAST::Regex.new( PAST::Node.new('!BACKREF', $name),
+ :rxtype<subrule>, :subtype<method>, :node($/));
+ }
+ make $qast;
+ }
+
+ method metachar:sym<~>($/) {
+ make QAST::Regex.new(
+ $<EXPR>.ast,
+ QAST::Regex.new(
+ $<GOAL>.ast,
+ QAST::Regex.new( PAST::Node.new('FAILGOAL', ~$<GOAL>),
+ :rxtype<subrule>, :subtype<method> ),
+ :rxtype<altseq>
+ ),
+ :rxtype<concat>
+ );
+ }
+
+ method backslash:sym<s>($/) {
+ make QAST::Regex.new(:rxtype<cclass>, '.CCLASS_WHITESPACE',
+ :subtype($<sym> eq 'n' ?? 'nl' !! ~$<sym>),
+ :negate($<sym> le 'Z'), :node($/));
+ }
+
+ method backslash:sym<b>($/) {
+ my $qast := QAST::Regex.new( "\b", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'B'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<e>($/) {
+ my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'E'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<f>($/) {
+ my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'F'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<h>($/) {
+ my $qast := QAST::Regex.new( "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'H'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<r>($/) {
+ my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'R'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<t>($/) {
+ my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'),
+ :negate($<sym> eq 'T'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<v>($/) {
+ my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]",
+ :rxtype('enumcharlist'),
+ :negate($<sym> eq 'V'), :node($/) );
+ make $qast;
+ }
+
+ method backslash:sym<o>($/) {
+ my $octlit :=
+ HLL::Actions.ints_to_string( $<octint> || $<octints><octint> );
+ make $<sym> eq 'O'
+ ?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'),
+ :negate(1), :node($/) )
+ !! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) );
+ }
+
+ method backslash:sym<x>($/) {
+ my $hexlit :=
+ HLL::Actions.ints_to_string( $<hexint> || $<hexints><hexint> );
+ make $<sym> eq 'X'
+ ?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'),
+ :negate(1), :node($/) )
+ !! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) );
+ }
+
+ method backslash:sym<c>($/) {
+ make QAST::Regex.new( $<charspec>.ast, :rxtype('literal'), :node($/) );
+ }
+
+ method backslash:sym<misc>($/) {
+ my $qast := QAST::Regex.new( ~$/ , :rxtype('literal'), :node($/) );
+ make $qast;
+ }
+
+ method assertion:sym<?>($/) {
+ my $qast;
+ if $<assertion> {
+ $qast := $<assertion>.ast;
+ $qast.subtype('zerowidth');
+ }
+ else {
+ $qast := QAST::Regex.new( :rxtype<anchor>, :subtype<pass>, :node($/) );
+ }
+ make $qast;
+ }
+
+ method assertion:sym<!>($/) {
+ my $qast;
+ if $<assertion> {
+ $qast := $<assertion>.ast;
+ $qast.negate( !$qast.negate );
+ $qast.subtype('zerowidth');
+ }
+ else {
+ $qast := QAST::Regex.new( :rxtype<anchor>, :subtype<fail>, :node($/) );
+ }
+ make $qast;
+ }
+
+ method assertion:sym<|>($/) {
+ my $qast;
+ my $name := ~$<identifier>;
+ if $name eq 'c' {
+ # codepoint boundaries alway match in
+ # our current Unicode abstraction level
+ $qast := 0;
+ }
+ elsif $name eq 'w' {
+ $qast := QAST::Regex.new(:rxtype<subrule>, :subtype<method>,
+ :node($/), PAST::Node.new('wb'),
+ :name('') );
+ }
+ make $qast;
+ }
+
+ method assertion:sym<method>($/) {
+ my $qast := $<assertion>.ast;
+ $qast.subtype('method');
+ $qast.name('');
+ make $qast;
+ }
+
+ method assertion:sym<name>($/) {
+ my $name := ~$<longname>;
+ my $qast;
+ if $<assertion> {
+ $qast := $<assertion>[0].ast;
+ self.subrule_alias($qast, $name);
+ }
+ elsif $name eq 'sym' {
+ my $loc := nqp::index(%*RX<name>, ':sym<');
+ $loc := nqp::index(%*RX<name>, ':sym«')
+ if $loc < 0;
+ my $rxname := pir::chopn__Ssi(nqp::substr(%*RX<name>, $loc + 5), 1);
+ $qast := QAST::Regex.new(:name('sym'), :rxtype<subcapture>, :node($/),
+ QAST::Regex.new(:rxtype<literal>, $rxname, :node($/)));
+ }
+ else {
+ $qast := QAST::Regex.new(:rxtype<subrule>, :subtype<capture>,
+ :node($/), PAST::Node.new($name),
+ :name($name) );
+ if $<arglist> {
+ for $<arglist>[0].ast.list { $qast[0].push( $_ ) }
+ }
+ elsif $<nibbler> {
+ $name eq 'after' ??
+ $qast[0].push(buildsub(self.flip_ast($<nibbler>[0].ast), :anon(1))) !!
+ $qast[0].push(buildsub($<nibbler>[0].ast, :anon(1)));
+ }
+ }
+ make $qast;
+ }
+
+ method assertion:sym<[>($/) {
+ my $clist := $<cclass_elem>;
+ my $qast := $clist[0].ast;
+ if $qast.negate && $qast.rxtype eq 'subrule' {
+ $qast.subtype('zerowidth');
+ $qast := QAST::Regex.new(:rxtype<concat>, :node($/),
+ $qast,
+ QAST::Regex.new( :rxtype<cclass>, :subtype<.> ));
+ }
+ my $i := 1;
+ my $n := +$clist;
+ while $i < $n {
+ my $ast := $clist[$i].ast;
+ if $ast.negate {
+ $ast.subtype('zerowidth');
+ $qast := QAST::Regex.new( $ast, $qast, :rxtype<concat>, :node($/));
+ }
+ else {
+ $qast := QAST::Regex.new( $qast, $ast, :rxtype<altseq>, :node($/));
+ }
+ $i++;
+ }
+ make $qast;
+ }
+
+ method arg($/) {
+ make $<quote_EXPR> ?? $<quote_EXPR>.ast !! +$<val>;
+ }
+
+ method arglist($/) {
+ my $past := PAST::Op.new( :pasttype('list') );
+ for $<arg> { $past.push( $_.ast ); }
+ make $past;
+ }
+
+ method cclass_elem($/) {
+ my $str := '';
+ my $qast;
+ if $<name> {
+ my $name := ~$<name>;
+ $qast := QAST::Regex.new( PAST::Node.new($name), :rxtype<subrule>, :subtype<method>,
+ :negate( $<sign> eq '-' ), :node($/) );
+ }
+ elsif $<uniprop> {
+ my $uniprop := ~$<uniprop>;
+ $qast := QAST::Regex.new( $uniprop, :rxtype<uniprop>,
+ :negate( $<sign> eq '-' && $<invert> ne '!' # $<sign> ^^ $<invert>
+ || $<sign> ne '-' && $<invert> eq '!' ), :node($/) );
+ }
+ else {
+ my @alts;
+ for $<charspec> {
+ if $_[1] {
+ my $node;
+ my $lhs;
+ my $rhs;
+ if $_[0]<backslash> {
+ $node := $_[0]<backslash>.ast;
+ $/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
+ if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
+ || $node.negate || nqp::chars($node[0]) != 1;
+ $lhs := $node[0];
+ }
+ else {
+ $lhs := ~$_[0][0];
+ }
+ if $_[1][0]<backslash> {
+ $node := $_[1][0]<backslash>.ast;
+ $/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
+ if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
+ || $node.negate || nqp::chars($node[0]) != 1;
+ $rhs := $node[0];
+ }
+ else {
+ $rhs := ~$_[1][0][0];
+ }
+ my $ord0 := nqp::ord($lhs);
+ my $ord1 := nqp::ord($rhs);
+ $/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
+ if $ord0 > $ord1;
+ $str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1;
+ }
+ elsif $_[0]<backslash> {
+ my $bs := $_[0]<backslash>.ast;
+ $bs.negate(!$bs.negate) if $<sign> eq '-';
+ @alts.push($bs);
+ }
+ else { $str := $str ~ ~$_[0]; }
+ }
+ @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
+ if nqp::chars($str);
+ $qast := +@alts == 1 ?? @alts[0] !!
+ $<sign> eq '-' ??
+ QAST::Regex.new( :rxtype<concat>, :node($/),
+ QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
+ QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
+ QAST::Regex.new( :rxtype<altseq>, |@alts );
+ }
+ #$qast.negate( $<sign> eq '-' );
+ make $qast;
+ }
+
+ method mod_internal($/) {
+ my $n := $<n>[0] gt '' ?? +$<n>[0] !! 1;
+ %*RX{ ~$<mod_ident><sym> } := $n;
+ make 0;
+ }
+
+ sub backmod($ast, $backmod) {
+ if $backmod eq ':' { $ast.backtrack('r') }
+ elsif $backmod eq ':?' || $backmod eq '?' { $ast.backtrack('f') }
+ elsif $backmod eq ':!' || $backmod eq '!' { $ast.backtrack('g') }
+ $ast;
+ }
+
+ our sub buildsub($qast, $block = PAST::Block.new(:blocktype<method>), :$anon) {
+ my $blockid := $block.subid;
+ my $hashpast := PAST::Op.new( :pasttype<hash> );
+ for capnames($qast, 0) {
+ if $_.key gt '' {
+ $hashpast.push($_.key);
+ $hashpast.push(
+ nqp::iscclass(pir::const::CCLASS_NUMERIC, $_.key, 0) + ($_.value > 1) * 2);
+ }
+ }
+ my $initpast := PAST::Stmts.new();
+ my $capblock := PAST::Block.new( :hll<nqp>, :namespace(['Sub']), :lexical(0),
+ :name($blockid ~ '_caps'), $hashpast );
+ $initpast.push(PAST::Stmt.new($capblock));
+
+ my $nfapast := QRegex::NFA.new.addnode($qast).past;
+ if $nfapast {
+ my $nfablock := PAST::Block.new(
+ :hll<nqp>, :namespace(['Sub']), :lexical(0),
+ :name($blockid ~ '_nfa'), $nfapast);
+ $initpast.push(PAST::Stmt.new($nfablock));
+ }
+ alt_nfas($qast, $blockid, $initpast);
+
+ unless $block.symbol('$¢') {
+ $initpast.push(PAST::Var.new(:name<$¢>, :scope<lexical>, :isdecl(1)));
+ $block.symbol('$¢', :scope<lexical>);
+ }
+
+ $block<orig_qast> := $qast;
+
+ $qast := QAST::Regex.new( :rxtype<concat>,
+ QAST::Regex.new( :rxtype<scan> ),
+ $qast,
+ ($anon ??
+ QAST::Regex.new( :rxtype<pass> ) !!
+ QAST::Regex.new( :rxtype<pass>, :name(%*RX<name>) )));
+ $block.push($initpast);
+ $block.push(PAST::QAST.new($qast));
+ $block;
+ }
+
+ our sub qbuildsub($qast, $block = QAST::Block.new(), :$anon, :$addself) {
+ my $blockid := $block.cuid;
+ my $hashpast := QAST::Op.new( :op<hash> );
+ for capnames($qast, 0) {
+ if $_.key gt '' {
+ $hashpast.push(QAST::SVal.new( :value($_.key) ));
+ $hashpast.push(QAST::IVal.new( :value(
+ nqp::iscclass(pir::const::CCLASS_NUMERIC, $_.key, 0) + ($_.value > 1) * 2) ));
+ }
+ }
+ my $initpast := QAST::Stmts.new();
+ if $addself {
+ $initpast.push(QAST::Var.new( :name('self'), :scope('local'), :decl('param') ));
+ }
+ my $capblock := QAST::BlockMemo.new( :name($blockid ~ '_caps'), $hashpast );
+ $initpast.push(QAST::Stmt.new($capblock));
+
+ my $nfapast := QRegex::NFA.new.addnode($qast).qast;
+ if $nfapast {
+ my $nfablock := QAST::BlockMemo.new( :name($blockid ~ '_nfa'), $nfapast);
+ $initpast.push(QAST::Stmt.new($nfablock));
+ }
+ qalt_nfas($qast, $blockid, $initpast);
+
+ unless $block.symbol('$¢') {
+ $initpast.push(QAST::Var.new(:name<$¢>, :scope<lexical>, :decl('var')));
+ $block.symbol('$¢', :scope<lexical>);
+ }
+
+ $block<orig_qast> := $qast;
+
+ $qast := QAST::Regex.new( :rxtype<concat>,
+ QAST::Regex.new( :rxtype<scan> ),
+ $qast,
+ ($anon ??
+ QAST::Regex.new( :rxtype<pass> ) !!
+ QAST::Regex.new( :rxtype<pass>, :name(%*RX<name>) )));
+ $block.push($initpast);
+ $block.push($qast);
+ $block;
+ }
+
+ sub capnames($ast, $count) {
+ my %capnames;
+ my $rxtype := $ast.rxtype;
+ if $rxtype eq 'concat' {
+ for $ast.list {
+ my %x := capnames($_, $count);
+ for %x { %capnames{$_.key} := +%capnames{$_.key} + $_.value; }
+ $count := %x{''};
+ }
+ }
+ elsif $rxtype eq 'altseq' || $rxtype eq 'alt' {
+ my $max := $count;
+ for $ast.list {
+ my %x := capnames($_, $count);
+ for %x {
+ %capnames{$_.key} := +%capnames{$_.key} < 2 && %x{$_.key} == 1 ?? 1 !! 2;
+ }
+ $max := %x{''} if %x{''} > $max;
+ }
+ $count := $max;
+ }
+ elsif $rxtype eq 'subrule' && $ast.subtype eq 'capture' {
+ my $name := $ast.name;
+ if $name eq '' { $name := $count; $ast.name($name); }
+ my @names := nqp::split('=', $name);
+ for @names {
+ if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
+ %capnames{$_} := 1;
+ }
+ }
+ elsif $rxtype eq 'subcapture' {
+ for nqp::split(' ', $ast.name) {
+ if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
+ %capnames{$_} := 1;
+ }
+ my %x := capnames($ast[0], $count);
+ for %x { %capnames{$_.key} := +%capnames{$_.key} + %x{$_.key} }
+ $count := %x{''};
+ }
+ elsif $rxtype eq 'quant' {
+ my %astcap := capnames($ast[0], $count);
+ for %astcap { %capnames{$_} := 2 }
+ $count := %astcap{''};
+ }
+ %capnames{''} := $count;
+ nqp::deletekey(%capnames, '$!from');
+ nqp::deletekey(%capnames, '$!to');
+ %capnames;
+ }
+
+ sub alt_nfas($ast, $subid, $initpast) {
+ my $rxtype := $ast.rxtype;
+ if $rxtype eq 'alt' {
+ my $nfapast := PAST::Op.new( :pasttype('list') );
+ $ast.name(PAST::Node.unique('alt_nfa_') ~ '_' ~ ~nqp::time_n());
+ for $ast.list {
+ alt_nfas($_, $subid, $initpast);
+ $nfapast.push(QRegex::NFA.new.addnode($_).past(:non_empty));
+ }
+ my $nfablock := PAST::Block.new(
+ :hll<nqp>, :namespace(['Sub']), :lexical(0),
+ :name($subid ~ '_' ~ $ast.name), $nfapast);
+ $initpast.push(PAST::Stmt.new($nfablock));
+ }
+ elsif $rxtype eq 'subcapture' || $rxtype eq 'quant' {
+ alt_nfas($ast[0], $subid, $initpast)
+ }
+ elsif $rxtype eq 'concat' || $rxtype eq 'altseq' || $rxtype eq 'conj' || $rxtype eq 'conjseq' {
+ for $ast.list { alt_nfas($_, $subid, $initpast) }
+ }
+ }
+
+ sub qalt_nfas($ast, $subid, $initpast) {
+ my $rxtype := $ast.rxtype;
+ if $rxtype eq 'alt' {
+ my $nfapast := QAST::Op.new( :op('list') );
+ $ast.name(QAST::Node.unique('alt_nfa_') ~ '_' ~ ~nqp::time_n());
+ for $ast.list {
+ qalt_nfas($_, $subid, $initpast);
+ $nfapast.push(QRegex::NFA.new.addnode($_).qast(:non_empty));
+ }
+ my $nfablock := QAST::BlockMemo.new( :name($subid ~ '_' ~ $ast.name), $nfapast);
+ $initpast.push(QAST::Stmt.new($nfablock));
+ }
+ elsif $rxtype eq 'subcapture' || $rxtype eq 'quant' {
+ qalt_nfas($ast[0], $subid, $initpast)
+ }
+ elsif $rxtype eq 'concat' || $rxtype eq 'altseq' || $rxtype eq 'conj' || $rxtype eq 'conjseq' {
+ for $ast.list { qalt_nfas($_, $subid, $initpast) }
+ }
+ }
+
+ method subrule_alias($ast, $name) {
+ if $ast.name gt '' { $ast.name( $name ~ '=' ~ $ast.name ); }
+ else { $ast.name($name); }
+ $ast.subtype('capture');
+ }
+
+ method flip_ast($qast) {
+ return $qast unless nqp::istype($qast, QAST::Regex);
+ if $qast.rxtype eq 'literal' {
+ $qast[0] := $qast[0].reverse();
+ }
+ elsif $qast.rxtype eq 'concat' {
+ my @tmp;
+ while +@($qast) { @tmp.push(@($qast).shift) }
+ while @tmp { @($qast).push(self.flip_ast(@tmp.pop)) }
+ }
+ elsif $qast.rxtype eq 'pastnode' {
+ # Don't go exploring these
+ }
+ else {
+ for @($qast) { self.flip_ast($_) }
+ }
+ $qast
+ }
+}
11 src/QRegex/P5Regex/Compiler.nqp
View
@@ -0,0 +1,11 @@
+class QRegex::P5Regex::Compiler is HLL::Compiler {
+}
+
+my $p5regex := QRegex::P5Regex::Compiler.new();
+$p5regex.language('QRegex::P5Regex');
+$p5regex.parsegrammar(QRegex::P5Regex::Grammar);
+$p5regex.parseactions(QRegex::P5Regex::Actions);
+
+sub MAIN(@ARGS) {
+ $p5regex.command_line(@ARGS, :encoding('utf8'), :transcode('ucs4'));
+}
222 src/QRegex/P5Regex/Grammar.nqp
View
@@ -0,0 +1,222 @@
+use QRegex;
+use NQPHLL;
+use QAST;
+use PASTRegex;
+
+grammar QRegex::P5Regex::Grammar is HLL::Grammar {
+ token TOP {
+ :my %*RX;
+ <nibbler>
+ [ $ || <.panic: 'Confused'> ]
+ }
+
+ token nibbler {
+ :my $OLDRX := pir::find_dynamic_lex__Ps('%*RX');
+ :my %*RX;
+ {
+ for $OLDRX { %*RX{$_.key} := $_.value; }
+ }
+ [ <.ws> ['||'|'|'|'&&'|'&'] ]?
+ <termaltseq>
+ }
+
+ token termaltseq {
+ <termish>
+ [ '|' <![|]> [ <termish> || <.panic: 'Null pattern not allowed'> ] ]*
+ }
+
+ token termish {
+ <.ws> # XXX assuming old /x here?
+ <noun=.quantified_atom>+
+ }
+
+ token quantified_atom {
+ <atom>
+ [ <.ws> <quantifier=p5quantifier> ]?
+ <.ws>
+ }
+
+ proto token p5metachar { <...> }
+
+ proto token p5backslash { <...> }
+
+ proto token p5assertion { <...> }
+
+ proto token p5quantifier { <...> }
+
+ proto token p5mod_internal { <...> }
+
+ token ws { [ \s+ | '#' \N* ]* }
+
+ # XXX Below here is straight from P6Regex and unreviewed.
+
+ token normspace { <?before \s | '#' > <.ws> }
+
+ token identifier { <.ident> [ <[\-']> <.ident> ]* }
+
+ token arg {
+ [
+ | <?[']> <quote_EXPR: ':q'>
+ | <?["]> <quote_EXPR: ':qq'>
+ | $<val>=[\d+]
+ ]
+ }
+
+ rule arglist { <arg> [ ',' <arg>]* }
+
+ token atom {
+ # :dba('regex atom')
+ [
+ | \w [ \w+! <?before \w> ]?
+ | <metachar>
+ ]
+ }
+
+ proto token quantifier { <...> }
+ token quantifier:sym<*> { <sym> <backmod> }
+ token quantifier:sym<+> { <sym> <backmod> }
+ token quantifier:sym<?> { <sym> <backmod> }
+ token quantifier:sym<{N,M}> { {} '{' (\d+) (','?) (\d*) '}'
+ <.obs: '{N,M} as general quantifier', '** N..M (or ** N..*)'>
+ }
+ token quantifier:sym<**> {
+ <sym> <normspace>? <backmod> <normspace>?
+ [
+ || $<min>=[\d+]
+ [ '..'
+ $<max>=[
+ || \d+
+ || '*'
+ || <.panic: "Only integers or '*' allowed as range quantifier endpoint">
+ ]
+ ]?
+ ]
+ }
+
+ token backmod { ':'? [ '?' | '!' | <!before ':'> ] }
+
+ proto token metachar { <...> }
+ token metachar:sym<ws> { <.normspace> }
+ token metachar:sym<[ ]> { '[' <nibbler> ']' }
+ token metachar:sym<( )> { '(' <nibbler> ')' }
+ token metachar:sym<'> { <?[']> <quote_EXPR: ':q'> }
+ token metachar:sym<"> { <?["]> <quote_EXPR: ':qq'> }
+ token metachar:sym<.> { <sym> }
+ token metachar:sym<^> { <sym> }
+ token metachar:sym<^^> { <sym> }
+ token metachar:sym<$> { <sym> }
+ token metachar:sym<$$> { <sym> }
+ token metachar:sym<:::> { <sym> <.panic: '::: not yet implemented'> }
+ token metachar:sym<::> { <sym> <.panic: ':: not yet implemented'> }
+ token metachar:sym<lwb> { $<sym>=['<<'|'«'] }
+ token metachar:sym<rwb> { $<sym>=['>>'|'»'] }
+ token metachar:sym<from> { '<(' }
+ token metachar:sym<to> { ')>' }
+ token metachar:sym<bs> { \\ <backslash> }
+ token metachar:sym<mod> { <mod_internal> }
+ token metachar:sym<quantifier> {
+ <quantifier> <.panic: 'Quantifier quantifies nothing'>
+ }
+
+ ## we cheat here, really should be regex_infix:sym<~>
+ token metachar:sym<~> {
+ <sym>
+ <.ws> <GOAL=.quantified_atom>
+ <.ws> <EXPR=.quantified_atom>
+ }
+
+ token metachar:sym<{*}> {
+ <sym>
+ [ \h* '#= ' \h* $<key>=[\S+ [\h+ \S+]*] ]?
+ }
+ token metachar:sym<assert> {
+ '<' <assertion>
+ [ '>' || <.panic: 'regex assertion not terminated by angle bracket'> ]
+ }
+
+ token metachar:sym<var> {
+ [
+ | '$<' $<name>=[<-[>]>+] '>'
+ | '$' $<pos>=[\d+]
+ ]
+
+ [ <.ws> '=' <.ws> <quantified_atom> ]?
+ }
+
+ token metachar:sym<PIR> {
+ ':PIR{{' $<pir>=[.*?] '}}'
+ }
+
+ proto token backslash { <...> }
+ token backslash:sym<s> { $<sym>=[<[dDnNsSwW]>] }
+ token backslash:sym<b> { $<sym>=[<[bB]>] }
+ token backslash:sym<e> { $<sym>=[<[eE]>] }
+ token backslash:sym<f> { $<sym>=[<[fF]>] }
+ token backslash:sym<h> { $<sym>=[<[hH]>] }
+ token backslash:sym<r> { $<sym>=[<[rR]>] }
+ token backslash:sym<t> { $<sym>=[<[tT]>] }
+ token backslash:sym<v> { $<sym>=[<[vV]>] }
+ token backslash:sym<o> { $<sym>=[<[oO]>] [ <octint> | '[' <octints> ']' ] }
+ token backslash:sym<x> { $<sym>=[<[xX]>] [ <hexint> | '[' <hexints> ']' ] }
+ token backslash:sym<c> { $<sym>=[<[cC]>] <charspec> }
+ token backslash:sym<A> { 'A' <.obs: '\\A as beginning-of-string matcher', '^'> }
+ token backslash:sym<z> { 'z' <.obs: '\\z as end-of-string matcher', '$'> }
+ token backslash:sym<Z> { 'Z' <.obs: '\\Z as end-of-string matcher', '\\n?$'> }
+ token backslash:sym<Q> { 'Q' <.obs: '\\Q as quotemeta', 'quotes or literal variable match'> }
+ token backslash:sym<unrec> { {} \w <.panic: 'Unrecognized backslash sequence'> }
+ token backslash:sym<misc> { \W }
+
+ proto token assertion { <...> }
+
+ token assertion:sym<?> { '?' [ <?before '>' > | <assertion> ] }
+ token assertion:sym<!> { '!' [ <?before '>' > | <assertion> ] }
+ token assertion:sym<|> { '|' <identifier> }
+
+ token assertion:sym<method> {
+ '.' <assertion>
+ }
+
+ token assertion:sym<name> {
+ <longname=.identifier>
+ [
+ | <?before '>'>
+ | '=' <assertion>
+ | ':' <arglist>
+ | '(' <arglist> ')'
+ | <.normspace> <nibbler>
+ ]?
+ }
+
+ token assertion:sym<[> { <?before '['|'+'|'-'|':'> <cclass_elem>+ }
+
+ token cclass_elem {
+ $<sign>=['+'|'-'|<?>]
+ <.normspace>?
+ [
+ | '[' $<charspec>=(
+ || \s* '-' <!before \s* ']'> <.obs: '- as character range','.. for range, for explicit - in character class, escape it or place as last thing'>
+ || \s* ( '\\' <backslash> || (<-[\]\\]>) )
+ [
+ \s* '..' \s*
+ ( '\\' <backslash> || (<-[\]\\]>) )
+ ]?
+ )*
+ \s* ']'
+ | $<name>=[\w+]
+ | ':' $<invert>=['!'|<?>] $<uniprop>=[\w+]
+ ]
+ <.normspace>?
+ }
+
+ token mod_internal {
+ [
+ | ':' $<n>=('!' | \d+)**1 <mod_ident> »
+ | ':' <mod_ident> [ '(' $<n>=[\d+] ')' ]?
+ ]
+ }
+
+ proto token mod_ident { <...> }
+ token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? }
+ token mod_ident:sym<ratchet> { $<sym>=[r] 'atchet'? }
+ token mod_ident:sym<sigspace> { $<sym>=[s] 'igspace'? }
+}
19 tools/build/Makefile.in
View
@@ -99,6 +99,11 @@ P6QREGEX_SOURCES = \
src/QRegex/P6Regex/Actions.nqp \
src/QRegex/P6Regex/Compiler.nqp \
+P5QREGEX_SOURCES = \
+ src/QRegex/P5Regex/Grammar.nqp \
+ src/QRegex/P5Regex/Actions.nqp \
+ src/QRegex/P5Regex/Compiler.nqp \
+
QREGEX_COMBINED = QRegex.nqp
QREGEX_PIR = QRegex.pir
QREGEX_PBC = QRegex.pbc
@@ -151,6 +156,10 @@ P6QREGEX_PBC = NQPP6QRegex.pbc
P6QREGEX_COMBINED = gen/NQPP6QRegex.pm
P6QREGEX_COMBINED_PIR = gen/NQPP6QRegex.pir
+P5QREGEX_PBC = NQPP5QRegex.pbc
+P5QREGEX_COMBINED = gen/NQPP5QRegex.pm
+P5QREGEX_COMBINED_PIR = gen/NQPP5QRegex.pir
+
NQP_COMBINED = gen/NQP.pm
NQP_COMBINED_PIR = gen/NQP.pir
NQP_PBC = nqp.pbc
@@ -518,7 +527,7 @@ CLEANUPS = \
3rdparty/sha1/*$(O) \
$(DYNEXT_DIR)/*$(LOAD_EXT) \
-all: $(NQP_EXE)
+all: $(NQP_EXE) $(P5QREGEX_PBC)
install: all
$(MKPATH) $(DESTDIR)$(PARROT_LIBRARY_DIR)
@@ -527,6 +536,7 @@ install: all
$(MKPATH) $(DESTDIR)$(NQP_LANG_DIR)/lib
$(CP) $(QAST_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(QAST_PBC)
$(CP) $(P6QREGEX_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(P6QREGEX_PBC)
+ $(CP) $(P5QREGEX_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(P5QREGEX_PBC)
$(CP) $(HLL_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(HLL_PBC)
$(CP) $(CORE_SETTING_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(CORE_SETTING_PBC)
$(CP) $(NQP_MO_PBC) $(DESTDIR)$(NQP_LANG_DIR)/lib/$(NQP_MO_PBC)
@@ -715,6 +725,13 @@ $(ALL_PBCS): $(STAGE2_PBCS)
$(NQP_EXE): $(NQP_PBC) $(PARROT_DLL_COPY)
$(PBC_TO_EXE) $(NQP_PBC)
+$(P5QREGEX_PBC): $(NQP_EXE) $(P5QREGEX_SOURCES)
+ $(MKPATH) $(STAGE2)/gen
+ $(PERL) tools/build/gen-cat.pl $(P5QREGEX_SOURCES) > $(STAGE2)/$(P5QREGEX_COMBINED)
+ $(NQP_EXE) --target=pir --output=$(STAGE2)/$(P5QREGEX_COMBINED_PIR) \
+ $(STAGE2)/$(P5QREGEX_COMBINED)
+ $(PARROT) -o $(P5QREGEX_PBC) $(STAGE2)/$(P5QREGEX_COMBINED_PIR)
+
@make_dllcopy@
$(DYNEXT_TARGET): $(DYNPMC) $(DYNOPS) $(OPS_DIR)/$(BIGINT_OPS)$(LOAD_EXT) $(OPS_DIR)/$(DYNCALL_OPS)$(LOAD_EXT)
Please sign in to comment.
Something went wrong with that request. Please try again.