Skip to content
This repository
Browse code

Copy P6Regex to get a P5Regex. Start making a few of the basic change…

…s needed, looking to STD_P5 for inspiration.
  • Loading branch information...
commit b1a3a4611be9ddbfc1006b7714005a47fec45d39 1 parent dd9cb19
Jonathan Worthington authored August 23, 2012
674  src/QRegex/P5Regex/Actions.nqp
... ...
@@ -0,0 +1,674 @@
  1
+class QRegex::P5Regex::Actions is HLL::Actions {
  2
+    method TOP($/) {
  3
+        make buildsub($<nibbler>.ast);
  4
+    }
  5
+
  6
+    method nibbler($/) { make $<termaltseq>.ast }
  7
+
  8
+    method termaltseq($/) {
  9
+        my $qast := $<termish>[0].ast;
  10
+        if +$<termish> > 1 {
  11
+            $qast := QAST::Regex.new( :rxtype<altseq>, :node($/) );
  12
+            for $<termish> { $qast.push($_.ast); }
  13
+        }
  14
+        make $qast;
  15
+    }
  16
+
  17
+    method termish($/) {
  18
+        my $qast := QAST::Regex.new( :rxtype<concat>, :node($/) );
  19
+        my $lastlit := 0;
  20
+        for $<noun> {
  21
+            my $ast := $_.ast;
  22
+            if $ast {
  23
+                if $lastlit && $ast.rxtype eq 'literal'
  24
+                        && !QAST::Node.ACCEPTS($ast[0]) {
  25
+                    $lastlit[0] := $lastlit[0] ~ $ast[0];
  26
+                }
  27
+                else {
  28
+                    $qast.push($_.ast);
  29
+                    $lastlit := $ast.rxtype eq 'literal' 
  30
+                                && !QAST::Node.ACCEPTS($ast[0])
  31
+                                  ?? $ast !! 0;
  32
+                }
  33
+            }
  34
+        }
  35
+        make $qast;
  36
+    }
  37
+
  38
+    method quantified_atom($/) {
  39
+        my $qast := $<atom>.ast;
  40
+        if $<quantifier> {
  41
+            my $ast := $<quantifier>[0].ast;
  42
+            $ast.unshift($qast);
  43
+            $qast := $ast;
  44
+        }
  45
+        $qast.backtrack('r') if $qast && !$qast.backtrack &&
  46
+            (%*RX<r> || $<backmod> && ~$<backmod>[0] eq ':');
  47
+        make $qast;
  48
+    }
  49
+
  50
+    method atom($/) {
  51
+        if $<metachar> {
  52
+            make $<metachar>.ast;
  53
+        }
  54
+        else {
  55
+            my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
  56
+            $qast.subtype('ignorecase') if %*RX<i>;
  57
+            make $qast;
  58
+        }
  59
+    }
  60
+
  61
+    method quantifier:sym<*>($/) {
  62
+        my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(-1), :node($/) );
  63
+        make backmod($qast, $<backmod>);
  64
+    }
  65
+
  66
+    method quantifier:sym<+>($/) {
  67
+        my $qast := QAST::Regex.new( :rxtype<quant>, :min(1), :max(-1), :node($/) );
  68
+        make backmod($qast, $<backmod>);
  69
+    }
  70
+
  71
+    method quantifier:sym<?>($/) {
  72
+        my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(1), :node($/) );
  73
+        make backmod($qast, $<backmod>);
  74
+    }
  75
+
  76
+    method quantifier:sym<**>($/) {
  77
+        my $qast;
  78
+        $qast := QAST::Regex.new( :rxtype<quant>, :min(+$<min>), :max(-1), :node($/) );
  79
+        if ! $<max> { $qast.max(+$<min>) }
  80
+        elsif $<max>[0] ne '*' { $qast.max(+$<max>[0]); }
  81
+        make backmod($qast, $<backmod>);
  82
+    }
  83
+
  84
+    method metachar:sym<ws>($/) {
  85
+        my $qast := %*RX<s>
  86
+                    ?? QAST::Regex.new(PAST::Node.new('ws'), :rxtype<ws>, :subtype<method>, :node($/))
  87
+                    !! 0;
  88
+        make $qast;
  89
+    }
  90
+
  91
+    method metachar:sym<[ ]>($/) {
  92
+        make $<nibbler>.ast;
  93
+    }
  94
+
  95
+    method metachar:sym<( )>($/) {
  96
+        my $subpast := PAST::Node.new(buildsub($<nibbler>.ast, :anon(1)));
  97
+        my $qast := QAST::Regex.new( $subpast, $<nibbler>.ast, :rxtype('subrule'),
  98
+                                     :subtype('capture'), :node($/) );
  99
+        make $qast;
  100
+    }
  101
+
  102
+    method metachar:sym<'>($/) {
  103
+        my $quote := $<quote_EXPR>.ast;
  104
+        if PAST::Val.ACCEPTS($quote) { $quote := $quote.value; }
  105
+        if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
  106
+        my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
  107
+        $qast.subtype('ignorecase') if %*RX<i>;
  108
+        make $qast;
  109
+    }
  110
+
  111
+    method metachar:sym<">($/) {
  112
+        my $quote := $<quote_EXPR>.ast;
  113
+        if PAST::Val.ACCEPTS($quote) { $quote := $quote.value; }
  114
+        if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
  115
+        my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
  116
+        $qast.subtype('ignorecase') if %*RX<i>;
  117
+        make $qast;
  118
+    }
  119
+
  120
+    method metachar:sym<.>($/) {
  121
+        make QAST::Regex.new( :rxtype<cclass>, :subtype<.>, :node($/) );
  122
+    }
  123
+
  124
+    method metachar:sym<^>($/) {
  125
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<bos>, :node($/) );
  126
+    }
  127
+
  128
+    method metachar:sym<^^>($/) {
  129
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<bol>, :node($/) );
  130
+    }
  131
+
  132
+    method metachar:sym<$>($/) {
  133
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<eos>, :node($/) );
  134
+    }
  135
+
  136
+    method metachar:sym<$$>($/) {
  137
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<eol>, :node($/) );
  138
+    }
  139
+
  140
+    method metachar:sym<lwb>($/) {
  141
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<lwb>, :node($/) );
  142
+    }
  143
+
  144
+    method metachar:sym<rwb>($/) {
  145
+        make QAST::Regex.new( :rxtype<anchor>, :subtype<rwb>, :node($/) );
  146
+    }
  147
+
  148
+    method metachar:sym<from>($/) {
  149
+        make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
  150
+            :backtrack<r>,
  151
+            :name<$!from>, PAST::Node.new('!LITERAL', ''), :node($/) );
  152
+    }
  153
+
  154
+    method metachar:sym<to>($/) {
  155
+        make QAST::Regex.new( :rxtype<subrule>, :subtype<capture>,
  156
+            :backtrack<r>,
  157
+            :name<$!to>, PAST::Node.new('!LITERAL', ''), :node($/) );
  158
+    }
  159
+
  160
+    method metachar:sym<bs>($/) {
  161
+        make $<backslash>.ast;
  162
+    }
  163
+
  164
+    method metachar:sym<assert>($/) {
  165
+        make $<assertion>.ast;
  166
+    }
  167
+
  168
+    method metachar:sym<var>($/) {
  169
+        my $qast;
  170
+        my $name := $<pos> ?? +$<pos> !! ~$<name>;
  171
+        if $<quantified_atom> {
  172
+            $qast := $<quantified_atom>[0].ast;
  173
+            if $qast.rxtype eq 'quant' && $qast[0].rxtype eq 'subrule' {
  174
+                self.subrule_alias($qast[0], $name);
  175
+            }
  176
+            elsif $qast.rxtype eq 'subrule' { 
  177
+                self.subrule_alias($qast, $name); 
  178
+            }
  179
+            else {
  180
+                $qast := QAST::Regex.new( $qast, :name($name), 
  181
+                                          :rxtype<subcapture>, :node($/) );
  182
+            }
  183
+        }
  184
+        else {
  185
+            $qast := QAST::Regex.new( PAST::Node.new('!BACKREF', $name),
  186
+                         :rxtype<subrule>, :subtype<method>, :node($/));
  187
+        }
  188
+        make $qast;
  189
+    }
  190
+
  191
+    method metachar:sym<~>($/) {
  192
+        make QAST::Regex.new(
  193
+            $<EXPR>.ast,
  194
+            QAST::Regex.new(
  195
+                $<GOAL>.ast,
  196
+                QAST::Regex.new( PAST::Node.new('FAILGOAL', ~$<GOAL>),
  197
+                                 :rxtype<subrule>, :subtype<method> ),
  198
+                :rxtype<altseq>
  199
+            ),
  200
+            :rxtype<concat>
  201
+        );
  202
+    }
  203
+
  204
+    method backslash:sym<s>($/) {
  205
+        make QAST::Regex.new(:rxtype<cclass>, '.CCLASS_WHITESPACE', 
  206
+                             :subtype($<sym> eq 'n' ?? 'nl' !! ~$<sym>), 
  207
+                             :negate($<sym> le 'Z'), :node($/));
  208
+    }
  209
+
  210
+    method backslash:sym<b>($/) {
  211
+        my $qast := QAST::Regex.new( "\b", :rxtype('enumcharlist'),
  212
+                        :negate($<sym> eq 'B'), :node($/) );
  213
+        make $qast;
  214
+    }
  215
+
  216
+    method backslash:sym<e>($/) {
  217
+        my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'),
  218
+                        :negate($<sym> eq 'E'), :node($/) );
  219
+        make $qast;
  220
+    }
  221
+
  222
+    method backslash:sym<f>($/) {
  223
+        my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'),
  224
+                        :negate($<sym> eq 'F'), :node($/) );
  225
+        make $qast;
  226
+    }
  227
+
  228
+    method backslash:sym<h>($/) {
  229
+        my $qast := QAST::Regex.new( "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'),
  230
+                        :negate($<sym> eq 'H'), :node($/) );
  231
+        make $qast;
  232
+    }
  233
+
  234
+    method backslash:sym<r>($/) {
  235
+        my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'),
  236
+                        :negate($<sym> eq 'R'), :node($/) );
  237
+        make $qast;
  238
+    }
  239
+
  240
+    method backslash:sym<t>($/) {
  241
+        my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'),
  242
+                        :negate($<sym> eq 'T'), :node($/) );
  243
+        make $qast;
  244
+    }
  245
+
  246
+    method backslash:sym<v>($/) {
  247
+        my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]",
  248
+                        :rxtype('enumcharlist'),
  249
+                        :negate($<sym> eq 'V'), :node($/) );
  250
+        make $qast;
  251
+    }
  252
+
  253
+    method backslash:sym<o>($/) {
  254
+        my $octlit :=
  255
+            HLL::Actions.ints_to_string( $<octint> || $<octints><octint> );
  256
+        make $<sym> eq 'O'
  257
+             ?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'),
  258
+                                  :negate(1), :node($/) )
  259
+             !! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) );
  260
+    }
  261
+
  262
+    method backslash:sym<x>($/) {
  263
+        my $hexlit :=
  264
+            HLL::Actions.ints_to_string( $<hexint> || $<hexints><hexint> );
  265
+        make $<sym> eq 'X'
  266
+             ?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'),
  267
+                                  :negate(1), :node($/) )
  268
+             !! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) );
  269
+    }
  270
+
  271
+    method backslash:sym<c>($/) {
  272
+        make QAST::Regex.new( $<charspec>.ast, :rxtype('literal'), :node($/) );
  273
+    }
  274
+
  275
+    method backslash:sym<misc>($/) {
  276
+        my $qast := QAST::Regex.new( ~$/ , :rxtype('literal'), :node($/) );
  277
+        make $qast;
  278
+    }
  279
+
  280
+    method assertion:sym<?>($/) {
  281
+        my $qast;
  282
+        if $<assertion> {
  283
+            $qast := $<assertion>.ast;
  284
+            $qast.subtype('zerowidth');
  285
+        }
  286
+        else {
  287
+            $qast := QAST::Regex.new( :rxtype<anchor>, :subtype<pass>, :node($/) );
  288
+        }
  289
+        make $qast;
  290
+    }
  291
+
  292
+    method assertion:sym<!>($/) {
  293
+        my $qast;
  294
+        if $<assertion> {
  295
+            $qast := $<assertion>.ast;
  296
+            $qast.negate( !$qast.negate );
  297
+            $qast.subtype('zerowidth');
  298
+        }
  299
+        else {
  300
+            $qast := QAST::Regex.new( :rxtype<anchor>, :subtype<fail>, :node($/) );
  301
+        }
  302
+        make $qast;
  303
+    }
  304
+
  305
+    method assertion:sym<|>($/) {
  306
+        my $qast;
  307
+        my $name := ~$<identifier>;
  308
+        if $name eq 'c' {
  309
+            # codepoint boundaries alway match in
  310
+            # our current Unicode abstraction level
  311
+            $qast := 0;
  312
+        }
  313
+        elsif $name eq 'w' {
  314
+            $qast := QAST::Regex.new(:rxtype<subrule>, :subtype<method>,
  315
+                                     :node($/), PAST::Node.new('wb'), 
  316
+                                     :name('') );
  317
+        }
  318
+        make $qast;
  319
+    }
  320
+
  321
+    method assertion:sym<method>($/) {
  322
+        my $qast := $<assertion>.ast;
  323
+        $qast.subtype('method');
  324
+        $qast.name('');
  325
+        make $qast;
  326
+    }
  327
+
  328
+    method assertion:sym<name>($/) {
  329
+        my $name := ~$<longname>;
  330
+        my $qast;
  331
+        if $<assertion> {
  332
+            $qast := $<assertion>[0].ast;
  333
+            self.subrule_alias($qast, $name);
  334
+        }
  335
+        elsif $name eq 'sym' {
  336
+            my $loc := nqp::index(%*RX<name>, ':sym<');
  337
+            $loc := nqp::index(%*RX<name>, ':sym«')
  338
+                if $loc < 0;
  339
+            my $rxname := pir::chopn__Ssi(nqp::substr(%*RX<name>, $loc + 5), 1);
  340
+            $qast := QAST::Regex.new(:name('sym'), :rxtype<subcapture>, :node($/),
  341
+                QAST::Regex.new(:rxtype<literal>, $rxname, :node($/)));
  342
+        }
  343
+        else {
  344
+            $qast := QAST::Regex.new(:rxtype<subrule>, :subtype<capture>,
  345
+                                     :node($/), PAST::Node.new($name), 
  346
+                                     :name($name) );
  347
+            if $<arglist> {
  348
+                for $<arglist>[0].ast.list { $qast[0].push( $_ ) }
  349
+            }
  350
+            elsif $<nibbler> {
  351
+                $name eq 'after' ??
  352
+                    $qast[0].push(buildsub(self.flip_ast($<nibbler>[0].ast), :anon(1))) !!
  353
+                    $qast[0].push(buildsub($<nibbler>[0].ast, :anon(1)));
  354
+            }
  355
+        }
  356
+        make $qast;
  357
+    }
  358
+
  359
+    method assertion:sym<[>($/) {
  360
+        my $clist := $<cclass_elem>;
  361
+        my $qast  := $clist[0].ast;
  362
+        if $qast.negate && $qast.rxtype eq 'subrule' {
  363
+            $qast.subtype('zerowidth');
  364
+            $qast := QAST::Regex.new(:rxtype<concat>, :node($/),
  365
+                                     $qast, 
  366
+                                     QAST::Regex.new( :rxtype<cclass>, :subtype<.> ));
  367
+        }
  368
+        my $i := 1;
  369
+        my $n := +$clist;
  370
+        while $i < $n {
  371
+            my $ast := $clist[$i].ast;
  372
+            if $ast.negate {
  373
+                $ast.subtype('zerowidth');
  374
+                $qast := QAST::Regex.new( $ast, $qast, :rxtype<concat>, :node($/));
  375
+            }
  376
+            else {
  377
+                $qast := QAST::Regex.new( $qast, $ast, :rxtype<altseq>, :node($/));
  378
+            }
  379
+            $i++;
  380
+        }
  381
+        make $qast;
  382
+    }
  383
+    
  384
+    method arg($/) {
  385
+        make $<quote_EXPR> ?? $<quote_EXPR>.ast !! +$<val>;
  386
+    }
  387
+
  388
+    method arglist($/) {
  389
+        my $past := PAST::Op.new( :pasttype('list') );
  390
+        for $<arg> { $past.push( $_.ast ); }
  391
+        make $past;
  392
+    }
  393
+
  394
+    method cclass_elem($/) {
  395
+        my $str := '';
  396
+        my $qast;
  397
+        if $<name> {
  398
+            my $name := ~$<name>;
  399
+            $qast := QAST::Regex.new( PAST::Node.new($name), :rxtype<subrule>, :subtype<method>,
  400
+                                      :negate( $<sign> eq '-' ), :node($/) );
  401
+        }
  402
+        elsif $<uniprop> {
  403
+            my $uniprop := ~$<uniprop>;
  404
+            $qast := QAST::Regex.new( $uniprop, :rxtype<uniprop>,
  405
+                                      :negate( $<sign> eq '-' && $<invert> ne '!' # $<sign> ^^ $<invert>
  406
+                                        || $<sign> ne '-' && $<invert> eq '!' ), :node($/) );
  407
+        }
  408
+        else {
  409
+            my @alts;
  410
+            for $<charspec> {
  411
+                if $_[1] {
  412
+                    my $node;
  413
+                    my $lhs;
  414
+                    my $rhs;
  415
+                    if $_[0]<backslash> {
  416
+                        $node := $_[0]<backslash>.ast;
  417
+                        $/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
  418
+                            if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
  419
+                                || $node.negate || nqp::chars($node[0]) != 1;
  420
+                        $lhs := $node[0];
  421
+                    }
  422
+                    else {
  423
+                        $lhs := ~$_[0][0];
  424
+                    }
  425
+                    if $_[1][0]<backslash> {
  426
+                        $node := $_[1][0]<backslash>.ast;
  427
+                        $/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
  428
+                            if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
  429
+                                || $node.negate || nqp::chars($node[0]) != 1;
  430
+                        $rhs := $node[0];
  431
+                    }
  432
+                    else {
  433
+                        $rhs := ~$_[1][0][0];
  434
+                    }
  435
+                    my $ord0 := nqp::ord($lhs);
  436
+                    my $ord1 := nqp::ord($rhs);
  437
+                    $/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
  438
+                        if $ord0 > $ord1;
  439
+                    $str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1;
  440
+                }
  441
+                elsif $_[0]<backslash> {
  442
+                    my $bs := $_[0]<backslash>.ast;
  443
+                    $bs.negate(!$bs.negate) if $<sign> eq '-';
  444
+                    @alts.push($bs);
  445
+                }
  446
+                else { $str := $str ~ ~$_[0]; }
  447
+            }
  448
+            @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
  449
+                if nqp::chars($str);
  450
+            $qast := +@alts == 1 ?? @alts[0] !!
  451
+                $<sign> eq '-' ??
  452
+                    QAST::Regex.new( :rxtype<concat>, :node($/),
  453
+                        QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ), 
  454
+                        QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
  455
+                    QAST::Regex.new( :rxtype<altseq>, |@alts );
  456
+        }
  457
+        #$qast.negate( $<sign> eq '-' );
  458
+        make $qast;
  459
+    }
  460
+
  461
+    method mod_internal($/) {
  462
+        my $n := $<n>[0] gt '' ?? +$<n>[0] !! 1;
  463
+        %*RX{ ~$<mod_ident><sym> } := $n;
  464
+        make 0;
  465
+    }
  466
+
  467
+    sub backmod($ast, $backmod) {
  468
+        if $backmod eq ':' { $ast.backtrack('r') }
  469
+        elsif $backmod eq ':?' || $backmod eq '?' { $ast.backtrack('f') }
  470
+        elsif $backmod eq ':!' || $backmod eq '!' { $ast.backtrack('g') }
  471
+        $ast;
  472
+    }
  473
+
  474
+    our sub buildsub($qast, $block = PAST::Block.new(:blocktype<method>), :$anon) {
  475
+        my $blockid := $block.subid;
  476
+        my $hashpast := PAST::Op.new( :pasttype<hash> );
  477
+        for capnames($qast, 0) {
  478
+            if $_.key gt '' { 
  479
+                $hashpast.push($_.key); 
  480
+                $hashpast.push(
  481
+                    nqp::iscclass(pir::const::CCLASS_NUMERIC, $_.key, 0) + ($_.value > 1) * 2); 
  482
+            }
  483
+        }
  484
+        my $initpast := PAST::Stmts.new();
  485
+        my $capblock := PAST::Block.new( :hll<nqp>, :namespace(['Sub']), :lexical(0),
  486
+                                         :name($blockid ~ '_caps'),  $hashpast );
  487
+        $initpast.push(PAST::Stmt.new($capblock));
  488
+
  489
+        my $nfapast := QRegex::NFA.new.addnode($qast).past;
  490
+        if $nfapast {
  491
+            my $nfablock := PAST::Block.new( 
  492
+                                :hll<nqp>, :namespace(['Sub']), :lexical(0),
  493
+                                :name($blockid ~ '_nfa'), $nfapast);
  494
+            $initpast.push(PAST::Stmt.new($nfablock));
  495
+        }
  496
+        alt_nfas($qast, $blockid, $initpast);
  497
+
  498
+        unless $block.symbol('$¢') {
  499
+            $initpast.push(PAST::Var.new(:name<$¢>, :scope<lexical>, :isdecl(1)));
  500
+            $block.symbol('$¢', :scope<lexical>);
  501
+        }
  502
+
  503
+        $block<orig_qast> := $qast;
  504
+        
  505
+        $qast := QAST::Regex.new( :rxtype<concat>,
  506
+                     QAST::Regex.new( :rxtype<scan> ),
  507
+                     $qast,
  508
+                     ($anon ??
  509
+                          QAST::Regex.new( :rxtype<pass> ) !!
  510
+                          QAST::Regex.new( :rxtype<pass>, :name(%*RX<name>) )));
  511
+        $block.push($initpast);
  512
+        $block.push(PAST::QAST.new($qast));
  513
+        $block;
  514
+    }
  515
+    
  516
+    our sub qbuildsub($qast, $block = QAST::Block.new(), :$anon, :$addself) {
  517
+        my $blockid := $block.cuid;
  518
+        my $hashpast := QAST::Op.new( :op<hash> );
  519
+        for capnames($qast, 0) {
  520
+            if $_.key gt '' { 
  521
+                $hashpast.push(QAST::SVal.new( :value($_.key) )); 
  522
+                $hashpast.push(QAST::IVal.new( :value(
  523
+                    nqp::iscclass(pir::const::CCLASS_NUMERIC, $_.key, 0) + ($_.value > 1) * 2) )); 
  524
+            }
  525
+        }
  526
+        my $initpast := QAST::Stmts.new();
  527
+        if $addself {
  528
+            $initpast.push(QAST::Var.new( :name('self'), :scope('local'), :decl('param') ));
  529
+        }
  530
+        my $capblock := QAST::BlockMemo.new( :name($blockid ~ '_caps'),  $hashpast );
  531
+        $initpast.push(QAST::Stmt.new($capblock));
  532
+
  533
+        my $nfapast := QRegex::NFA.new.addnode($qast).qast;
  534
+        if $nfapast {
  535
+            my $nfablock := QAST::BlockMemo.new( :name($blockid ~ '_nfa'), $nfapast);
  536
+            $initpast.push(QAST::Stmt.new($nfablock));
  537
+        }
  538
+        qalt_nfas($qast, $blockid, $initpast);
  539
+
  540
+        unless $block.symbol('$¢') {
  541
+            $initpast.push(QAST::Var.new(:name<$¢>, :scope<lexical>, :decl('var')));
  542
+            $block.symbol('$¢', :scope<lexical>);
  543
+        }
  544
+
  545
+        $block<orig_qast> := $qast;
  546
+        
  547
+        $qast := QAST::Regex.new( :rxtype<concat>,
  548
+                     QAST::Regex.new( :rxtype<scan> ),
  549
+                     $qast,
  550
+                     ($anon ??
  551
+                          QAST::Regex.new( :rxtype<pass> ) !!
  552
+                          QAST::Regex.new( :rxtype<pass>, :name(%*RX<name>) )));
  553
+        $block.push($initpast);
  554
+        $block.push($qast);
  555
+        $block;
  556
+    }
  557
+
  558
+    sub capnames($ast, $count) {
  559
+        my %capnames;
  560
+        my $rxtype := $ast.rxtype;
  561
+        if $rxtype eq 'concat' {
  562
+            for $ast.list {
  563
+                my %x := capnames($_, $count);
  564
+                for %x { %capnames{$_.key} := +%capnames{$_.key} + $_.value; }
  565
+                $count := %x{''};
  566
+            } 
  567
+        }
  568
+        elsif $rxtype eq 'altseq' || $rxtype eq 'alt' {
  569
+            my $max := $count;
  570
+            for $ast.list {
  571
+                my %x := capnames($_, $count);
  572
+                for %x {
  573
+                    %capnames{$_.key} := +%capnames{$_.key} < 2 && %x{$_.key} == 1 ?? 1 !! 2;
  574
+                }
  575
+                $max := %x{''} if %x{''} > $max;
  576
+            }
  577
+            $count := $max;
  578
+        }
  579
+        elsif $rxtype eq 'subrule' && $ast.subtype eq 'capture' {
  580
+            my $name := $ast.name;
  581
+            if $name eq '' { $name := $count; $ast.name($name); }
  582
+            my @names := nqp::split('=', $name);
  583
+            for @names {
  584
+                if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
  585
+                %capnames{$_} := 1;
  586
+            }
  587
+        }
  588
+        elsif $rxtype eq 'subcapture' {
  589
+            for nqp::split(' ', $ast.name) {
  590
+                if $_ eq '0' || $_ > 0 { $count := $_ + 1; }
  591
+                %capnames{$_} := 1;
  592
+            }
  593
+            my %x := capnames($ast[0], $count);
  594
+            for %x { %capnames{$_.key} := +%capnames{$_.key} + %x{$_.key} }
  595
+            $count := %x{''};
  596
+        }
  597
+        elsif $rxtype eq 'quant' {
  598
+            my %astcap := capnames($ast[0], $count);
  599
+            for %astcap { %capnames{$_} := 2 }
  600
+            $count := %astcap{''};
  601
+        }
  602
+        %capnames{''} := $count;
  603
+        nqp::deletekey(%capnames, '$!from');
  604
+        nqp::deletekey(%capnames, '$!to');
  605
+        %capnames;
  606
+    }
  607
+    
  608
+    sub alt_nfas($ast, $subid, $initpast) {
  609
+        my $rxtype := $ast.rxtype;
  610
+        if $rxtype eq 'alt' {
  611
+            my $nfapast := PAST::Op.new( :pasttype('list') );
  612
+            $ast.name(PAST::Node.unique('alt_nfa_') ~ '_' ~ ~nqp::time_n());
  613
+            for $ast.list {
  614
+                alt_nfas($_, $subid, $initpast);
  615
+                $nfapast.push(QRegex::NFA.new.addnode($_).past(:non_empty));
  616
+            }
  617
+            my $nfablock := PAST::Block.new(
  618
+                                :hll<nqp>, :namespace(['Sub']), :lexical(0),
  619
+                                :name($subid ~ '_' ~ $ast.name), $nfapast);
  620
+            $initpast.push(PAST::Stmt.new($nfablock));
  621
+        }
  622
+        elsif $rxtype eq 'subcapture' || $rxtype eq 'quant' {
  623
+            alt_nfas($ast[0], $subid, $initpast)
  624
+        }
  625
+        elsif $rxtype eq 'concat' || $rxtype eq 'altseq' || $rxtype eq 'conj' || $rxtype eq 'conjseq' {
  626
+            for $ast.list { alt_nfas($_, $subid, $initpast) }
  627
+        }
  628
+    }
  629
+    
  630
+    sub qalt_nfas($ast, $subid, $initpast) {
  631
+        my $rxtype := $ast.rxtype;
  632
+        if $rxtype eq 'alt' {
  633
+            my $nfapast := QAST::Op.new( :op('list') );
  634
+            $ast.name(QAST::Node.unique('alt_nfa_') ~ '_' ~ ~nqp::time_n());
  635
+            for $ast.list {
  636
+                qalt_nfas($_, $subid, $initpast);
  637
+                $nfapast.push(QRegex::NFA.new.addnode($_).qast(:non_empty));
  638
+            }
  639
+            my $nfablock := QAST::BlockMemo.new( :name($subid ~ '_' ~ $ast.name), $nfapast);
  640
+            $initpast.push(QAST::Stmt.new($nfablock));
  641
+        }
  642
+        elsif $rxtype eq 'subcapture' || $rxtype eq 'quant' {
  643
+            qalt_nfas($ast[0], $subid, $initpast)
  644
+        }
  645
+        elsif $rxtype eq 'concat' || $rxtype eq 'altseq' || $rxtype eq 'conj' || $rxtype eq 'conjseq' {
  646
+            for $ast.list { qalt_nfas($_, $subid, $initpast) }
  647
+        }
  648
+    }
  649
+
  650
+    method subrule_alias($ast, $name) {
  651
+        if $ast.name gt '' { $ast.name( $name ~ '=' ~ $ast.name ); }
  652
+        else { $ast.name($name); }
  653
+        $ast.subtype('capture');
  654
+    }
  655
+
  656
+    method flip_ast($qast) {
  657
+        return $qast unless nqp::istype($qast, QAST::Regex);
  658
+        if $qast.rxtype eq 'literal' {
  659
+            $qast[0] := $qast[0].reverse();
  660
+        }
  661
+        elsif $qast.rxtype eq 'concat' {
  662
+            my @tmp;
  663
+            while +@($qast) { @tmp.push(@($qast).shift) }
  664
+            while @tmp      { @($qast).push(self.flip_ast(@tmp.pop)) }
  665
+        }
  666
+        elsif $qast.rxtype eq 'pastnode' {
  667
+            # Don't go exploring these
  668
+        }
  669
+        else {
  670
+            for @($qast) { self.flip_ast($_) }
  671
+        }
  672
+        $qast
  673
+    }
  674
+}
11  src/QRegex/P5Regex/Compiler.nqp
... ...
@@ -0,0 +1,11 @@
  1
+class QRegex::P5Regex::Compiler is HLL::Compiler {
  2
+}
  3
+
  4
+my $p5regex := QRegex::P5Regex::Compiler.new();
  5
+$p5regex.language('QRegex::P5Regex');
  6
+$p5regex.parsegrammar(QRegex::P5Regex::Grammar);
  7
+$p5regex.parseactions(QRegex::P5Regex::Actions);
  8
+
  9
+sub MAIN(@ARGS) {
  10
+    $p5regex.command_line(@ARGS, :encoding('utf8'), :transcode('ucs4'));
  11
+}
222  src/QRegex/P5Regex/Grammar.nqp
... ...
@@ -0,0 +1,222 @@
  1
+use QRegex;
  2
+use NQPHLL;
  3
+use QAST;
  4
+use PASTRegex;
  5
+
  6
+grammar QRegex::P5Regex::Grammar is HLL::Grammar {
  7
+    token TOP {
  8
+        :my %*RX;
  9
+        <nibbler>
  10
+        [ $ || <.panic: 'Confused'> ]
  11
+    }
  12
+
  13
+    token nibbler {
  14
+        :my $OLDRX := pir::find_dynamic_lex__Ps('%*RX');
  15
+        :my %*RX;
  16
+        {
  17
+            for $OLDRX { %*RX{$_.key} := $_.value; }
  18
+        }
  19
+        [ <.ws> ['||'|'|'|'&&'|'&'] ]?
  20
+        <termaltseq>
  21
+    }
  22
+    
  23
+    token termaltseq {
  24
+        <termish>
  25
+        [ '|' <![|]> [ <termish> || <.panic: 'Null pattern not allowed'> ] ]*
  26
+    }
  27
+    
  28
+    token termish {
  29
+        <.ws>  # XXX assuming old /x here?
  30
+        <noun=.quantified_atom>+
  31
+    }
  32
+    
  33
+    token quantified_atom {
  34
+        <atom>
  35
+        [ <.ws> <quantifier=p5quantifier> ]?
  36
+        <.ws>
  37
+    }
  38
+
  39
+    proto token p5metachar { <...> }
  40
+
  41
+    proto token p5backslash { <...> }
  42
+
  43
+    proto token p5assertion { <...> }
  44
+
  45
+    proto token p5quantifier { <...> }
  46
+
  47
+    proto token p5mod_internal { <...> }
  48
+
  49
+    token ws { [ \s+ | '#' \N* ]* }
  50
+
  51
+    # XXX Below here is straight from P6Regex and unreviewed.
  52
+
  53
+    token normspace { <?before \s | '#' > <.ws> }
  54
+
  55
+    token identifier { <.ident> [ <[\-']> <.ident> ]* }
  56
+
  57
+    token arg {
  58
+        [
  59
+        | <?[']> <quote_EXPR: ':q'>
  60
+        | <?["]> <quote_EXPR: ':qq'>
  61
+        | $<val>=[\d+]
  62
+        ]
  63
+    }
  64
+
  65
+    rule arglist { <arg> [ ',' <arg>]* }
  66
+    
  67
+    token atom {
  68
+        # :dba('regex atom')
  69
+        [
  70
+        | \w [ \w+! <?before \w> ]?
  71
+        | <metachar>
  72
+        ]
  73
+    }
  74
+
  75
+    proto token quantifier { <...> }
  76
+    token quantifier:sym<*> { <sym> <backmod> }
  77
+    token quantifier:sym<+> { <sym> <backmod> }
  78
+    token quantifier:sym<?> { <sym> <backmod> }
  79
+    token quantifier:sym<{N,M}> { {} '{' (\d+) (','?) (\d*) '}'
  80
+        <.obs: '{N,M} as general quantifier', '** N..M (or ** N..*)'>
  81
+    }
  82
+    token quantifier:sym<**> {
  83
+        <sym> <normspace>? <backmod> <normspace>?
  84
+        [
  85
+        ||  $<min>=[\d+] 
  86
+            [   '..' 
  87
+                $<max>=[ 
  88
+                       || \d+ 
  89
+                       || '*' 
  90
+                       || <.panic: "Only integers or '*' allowed as range quantifier endpoint"> 
  91
+                       ] 
  92
+            ]?
  93
+        ]
  94
+    }
  95
+
  96
+    token backmod { ':'? [ '?' | '!' | <!before ':'> ] }
  97
+
  98
+    proto token metachar { <...> }
  99
+    token metachar:sym<ws> { <.normspace> }
  100
+    token metachar:sym<[ ]> { '[' <nibbler> ']' }
  101
+    token metachar:sym<( )> { '(' <nibbler> ')' }
  102
+    token metachar:sym<'> { <?[']> <quote_EXPR: ':q'> }
  103
+    token metachar:sym<"> { <?["]> <quote_EXPR: ':qq'> }
  104
+    token metachar:sym<.> { <sym> }
  105
+    token metachar:sym<^> { <sym> }
  106
+    token metachar:sym<^^> { <sym> }
  107
+    token metachar:sym<$> { <sym> }
  108
+    token metachar:sym<$$> { <sym> }
  109
+    token metachar:sym<:::> { <sym> <.panic: '::: not yet implemented'> }
  110
+    token metachar:sym<::> { <sym> <.panic: ':: not yet implemented'> }
  111
+    token metachar:sym<lwb> { $<sym>=['<<'|'«'] }
  112
+    token metachar:sym<rwb> { $<sym>=['>>'|'»'] }
  113
+    token metachar:sym<from> { '<(' }
  114
+    token metachar:sym<to>   { ')>' }
  115
+    token metachar:sym<bs> { \\ <backslash> }
  116
+    token metachar:sym<mod> { <mod_internal> }
  117
+    token metachar:sym<quantifier> {
  118
+        <quantifier> <.panic: 'Quantifier quantifies nothing'>
  119
+    }
  120
+
  121
+    ## we cheat here, really should be regex_infix:sym<~>
  122
+    token metachar:sym<~> {
  123
+        <sym>
  124
+        <.ws> <GOAL=.quantified_atom>
  125
+        <.ws> <EXPR=.quantified_atom>
  126
+    }
  127
+
  128
+    token metachar:sym<{*}> {
  129
+        <sym>
  130
+        [ \h* '#= ' \h* $<key>=[\S+ [\h+ \S+]*] ]?
  131
+    }
  132
+    token metachar:sym<assert> {
  133
+        '<' <assertion>
  134
+        [ '>' || <.panic: 'regex assertion not terminated by angle bracket'> ]
  135
+    }
  136
+
  137
+    token metachar:sym<var> {
  138
+        [
  139
+        | '$<' $<name>=[<-[>]>+] '>'
  140
+        | '$' $<pos>=[\d+]
  141
+        ]
  142
+
  143
+        [ <.ws> '=' <.ws> <quantified_atom> ]?
  144
+    }
  145
+
  146
+    token metachar:sym<PIR> {
  147
+        ':PIR{{' $<pir>=[.*?] '}}'
  148
+    }
  149
+
  150
+    proto token backslash { <...> }
  151
+    token backslash:sym<s> { $<sym>=[<[dDnNsSwW]>] }
  152
+    token backslash:sym<b> { $<sym>=[<[bB]>] }
  153
+    token backslash:sym<e> { $<sym>=[<[eE]>] }
  154
+    token backslash:sym<f> { $<sym>=[<[fF]>] }
  155
+    token backslash:sym<h> { $<sym>=[<[hH]>] }
  156
+    token backslash:sym<r> { $<sym>=[<[rR]>] }
  157
+    token backslash:sym<t> { $<sym>=[<[tT]>] }
  158
+    token backslash:sym<v> { $<sym>=[<[vV]>] }
  159
+    token backslash:sym<o> { $<sym>=[<[oO]>] [ <octint> | '[' <octints> ']' ] }
  160
+    token backslash:sym<x> { $<sym>=[<[xX]>] [ <hexint> | '[' <hexints> ']' ] }
  161
+    token backslash:sym<c> { $<sym>=[<[cC]>] <charspec> }
  162
+    token backslash:sym<A> { 'A' <.obs: '\\A as beginning-of-string matcher', '^'> }
  163
+    token backslash:sym<z> { 'z' <.obs: '\\z as end-of-string matcher', '$'> }
  164
+    token backslash:sym<Z> { 'Z' <.obs: '\\Z as end-of-string matcher', '\\n?$'> }
  165
+    token backslash:sym<Q> { 'Q' <.obs: '\\Q as quotemeta', 'quotes or literal variable match'> }
  166
+    token backslash:sym<unrec> { {} \w <.panic: 'Unrecognized backslash sequence'> }
  167
+    token backslash:sym<misc> { \W }
  168
+
  169
+    proto token assertion { <...> }
  170
+
  171
+    token assertion:sym<?> { '?' [ <?before '>' > | <assertion> ] }
  172
+    token assertion:sym<!> { '!' [ <?before '>' > | <assertion> ] }
  173
+    token assertion:sym<|> { '|' <identifier> }
  174
+
  175
+    token assertion:sym<method> {
  176
+        '.' <assertion>
  177
+    }
  178
+
  179
+    token assertion:sym<name> {
  180
+        <longname=.identifier>
  181
+            [
  182
+            | <?before '>'>
  183
+            | '=' <assertion>
  184
+            | ':' <arglist>
  185
+            | '(' <arglist> ')'
  186
+            | <.normspace> <nibbler>
  187
+            ]?
  188
+    }
  189
+
  190
+    token assertion:sym<[> { <?before '['|'+'|'-'|':'> <cclass_elem>+ }
  191
+
  192
+    token cclass_elem {
  193
+        $<sign>=['+'|'-'|<?>]
  194
+        <.normspace>?
  195
+        [
  196
+        | '[' $<charspec>=(
  197
+                  || \s* '-' <!before \s* ']'> <.obs: '- as character range','.. for range, for explicit - in character class, escape it or place as last thing'>
  198
+                  || \s* ( '\\' <backslash> || (<-[\]\\]>) )
  199
+                     [
  200
+                         \s* '..' \s*
  201
+                         ( '\\' <backslash> || (<-[\]\\]>) )
  202
+                     ]?
  203
+              )*
  204
+          \s* ']'
  205
+        | $<name>=[\w+]
  206
+        | ':' $<invert>=['!'|<?>] $<uniprop>=[\w+]
  207
+        ]
  208
+        <.normspace>?
  209
+    }
  210
+
  211
+    token mod_internal {
  212
+        [
  213
+        | ':' $<n>=('!' | \d+)**1  <mod_ident> »
  214
+        | ':' <mod_ident> [ '(' $<n>=[\d+] ')' ]?
  215
+        ]
  216
+    }
  217
+
  218
+    proto token mod_ident { <...> }
  219
+    token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? }
  220
+    token mod_ident:sym<ratchet>    { $<sym>=[r] 'atchet'? }
  221
+    token mod_ident:sym<sigspace>   { $<sym>=[s] 'igspace'? }
  222
+}
19  tools/build/Makefile.in
@@ -99,6 +99,11 @@ P6QREGEX_SOURCES = \
99 99
   src/QRegex/P6Regex/Actions.nqp \
100 100
   src/QRegex/P6Regex/Compiler.nqp \
101 101
 
  102
+P5QREGEX_SOURCES = \
  103
+  src/QRegex/P5Regex/Grammar.nqp \
  104
+  src/QRegex/P5Regex/Actions.nqp \
  105
+  src/QRegex/P5Regex/Compiler.nqp \
  106
+
102 107
 QREGEX_COMBINED = QRegex.nqp
103 108
 QREGEX_PIR = QRegex.pir
104 109
 QREGEX_PBC = QRegex.pbc
@@ -151,6 +156,10 @@ P6QREGEX_PBC          = NQPP6QRegex.pbc
151 156
 P6QREGEX_COMBINED     = gen/NQPP6QRegex.pm
152 157
 P6QREGEX_COMBINED_PIR = gen/NQPP6QRegex.pir
153 158
 
  159
+P5QREGEX_PBC          = NQPP5QRegex.pbc
  160
+P5QREGEX_COMBINED     = gen/NQPP5QRegex.pm
  161
+P5QREGEX_COMBINED_PIR = gen/NQPP5QRegex.pir
  162
+
154 163
 NQP_COMBINED     = gen/NQP.pm
155 164
 NQP_COMBINED_PIR = gen/NQP.pir
156 165
 NQP_PBC          = nqp.pbc
@@ -518,7 +527,7 @@ CLEANUPS = \
518 527
   3rdparty/sha1/*$(O) \
519 528
   $(DYNEXT_DIR)/*$(LOAD_EXT) \
520 529
 
521  
-all: $(NQP_EXE)
  530
+all: $(NQP_EXE) $(P5QREGEX_PBC)
522 531
 
523 532
 install: all
524 533
 	$(MKPATH)                   $(DESTDIR)$(PARROT_LIBRARY_DIR)
@@ -527,6 +536,7 @@ install: all
527 536
 	$(MKPATH)                   $(DESTDIR)$(NQP_LANG_DIR)/lib
528 537
 	$(CP)  $(QAST_PBC)          $(DESTDIR)$(NQP_LANG_DIR)/lib/$(QAST_PBC)
529 538
 	$(CP)  $(P6QREGEX_PBC)      $(DESTDIR)$(NQP_LANG_DIR)/lib/$(P6QREGEX_PBC)
  539
+	$(CP)  $(P5QREGEX_PBC)      $(DESTDIR)$(NQP_LANG_DIR)/lib/$(P5QREGEX_PBC)
530 540
 	$(CP)  $(HLL_PBC)           $(DESTDIR)$(NQP_LANG_DIR)/lib/$(HLL_PBC)
531 541
 	$(CP)  $(CORE_SETTING_PBC)  $(DESTDIR)$(NQP_LANG_DIR)/lib/$(CORE_SETTING_PBC)
532 542
 	$(CP)  $(NQP_MO_PBC)        $(DESTDIR)$(NQP_LANG_DIR)/lib/$(NQP_MO_PBC)
@@ -715,6 +725,13 @@ $(ALL_PBCS): $(STAGE2_PBCS)
715 725
 $(NQP_EXE): $(NQP_PBC) $(PARROT_DLL_COPY)
716 726
 	$(PBC_TO_EXE) $(NQP_PBC)
717 727
 
  728
+$(P5QREGEX_PBC): $(NQP_EXE) $(P5QREGEX_SOURCES)
  729
+	$(MKPATH) $(STAGE2)/gen
  730
+	$(PERL) tools/build/gen-cat.pl $(P5QREGEX_SOURCES) > $(STAGE2)/$(P5QREGEX_COMBINED)
  731
+	$(NQP_EXE) --target=pir --output=$(STAGE2)/$(P5QREGEX_COMBINED_PIR) \
  732
+	    $(STAGE2)/$(P5QREGEX_COMBINED)
  733
+	$(PARROT) -o $(P5QREGEX_PBC) $(STAGE2)/$(P5QREGEX_COMBINED_PIR)
  734
+
718 735
 @make_dllcopy@
719 736
 
720 737
 $(DYNEXT_TARGET): $(DYNPMC) $(DYNOPS) $(OPS_DIR)/$(BIGINT_OPS)$(LOAD_EXT) $(OPS_DIR)/$(DYNCALL_OPS)$(LOAD_EXT)

0 notes on commit b1a3a46

Please sign in to comment.
Something went wrong with that request. Please try again.