@@ -41,6 +41,24 @@ class QRegex::Optimizer {
41
41
$ node ;
42
42
}
43
43
44
+ method dont_scan ($ node ) {
45
+ while + @ ($ node ) >= 1 {
46
+ if nqp ::istype($ node , QAST ::Regex) {
47
+ if nqp ::istype($ node [0 ], QAST ::Regex) && $ node [0 ]. rxtype eq ' scan' {
48
+ $ node . shift ;
49
+ last ;
50
+ }
51
+ if $ node . rxtype eq ' concat' {
52
+ $ node := $ node [0 ];
53
+ } else {
54
+ last ;
55
+ }
56
+ } else {
57
+ last ;
58
+ }
59
+ }
60
+ }
61
+
44
62
method visit_concat ($ node ) {
45
63
# a single-child concat can become the child itself
46
64
self . visit_children($ node );
@@ -70,38 +88,52 @@ class QRegex::Optimizer {
70
88
}
71
89
72
90
method simplify_assertion ($ qast ) {
73
- if $ qast . subtype eq ' zerowidth'
74
- && nqp ::istype($ qast [0 ], QAST ::Node) && nqp ::istype($ qast [0 ][0 ], QAST ::SVal)
75
- && $ qast [0 ][0 ]. value eq ' before' {
76
- if nqp ::istype($ qast [0 ], QAST ::Node) && nqp ::istype($ qast [0 ][1 ], QAST ::Block)
77
- && nqp ::istype((my $ regex := $ qast [0 ][1 ][2 ]), QAST ::Regex)
78
- && $ regex . rxtype eq ' concat' && $ regex [0 ]. rxtype eq ' scan' && $ regex [2 ]. rxtype eq ' pass' {
79
- self . visit_children($ regex );
80
- my $ simple := $ regex [1 ];
81
- my $ result := 0 ;
82
- if $ simple . rxtype eq ' literal' && $ simple . rxtype ne ' ignorecase' && ! $ qast . negate {
83
- $ result := QAST ::Regex. new (: rxtype<literal >, : subtype<zerowidth >, : node($ simple . node),
84
- : negate($ simple . negate),
85
- $ simple [0 ]);
86
- } elsif $ simple . rxtype eq ' enumcharlist' && $ simple . rxtype ne ' ignorecase' {
87
- $ result := QAST ::Regex. new (: rxtype<enumcharlist >, : subtype<zerowidth >, : node($ simple . node),
88
- : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)),
89
- $ simple [0 ]);
90
- } elsif $ simple . rxtype eq ' charrange' && $ simple . rxtype ne ' ignorecase' {
91
- $ result := QAST ::Regex. new (: rxtype<charrange >, : subtype<zerowidth >, : node($ simple . node),
92
- : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)),
93
- $ simple [0 ],
94
- $ simple [1 ],
95
- $ simple [2 ]);
96
- } elsif $ simple . rxtype eq ' cclass' && $ simple . rxtype ne ' ignorecase' {
97
- $ result := QAST ::Regex. new (: rxtype<cclass >, : subtype<zerowidth >, : node($ simple . node),
98
- : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)), : name($ simple . name ));
99
- }
100
- if $ result {
101
- self . stub_out_block($ qast [0 ][1 ]);
102
- $ qast := $ result ;
91
+ my $ child_is_block := nqp ::istype($ qast [0 ], QAST ::Node)
92
+ && (nqp ::istype($ qast [0 ][0 ], QAST ::SVal) || nqp ::istype($ qast [0 ][0 ], QAST ::Block));
93
+ if $ child_is_block {
94
+ my $ block := nqp ::istype($ qast [0 ][0 ], QAST ::SVal) ?? $ qast [0 ][1 ] !! $ qast [0 ][0 ];
95
+ my $ regex := $ block [2 ];
96
+ # extra safety
97
+ if ! nqp ::istype($ regex , QAST ::Regex) { return $ qast }
98
+ self . visit_children($ regex );
99
+ if $ qast . subtype eq ' zerowidth'
100
+ && $ child_is_block
101
+ && $ qast [0 ][0 ]. value eq ' before' {
102
+ if nqp ::istype($ qast [0 ], QAST ::Node) && nqp ::istype($ qast [0 ][1 ], QAST ::Block)
103
+ && $ regex . rxtype eq ' concat' && $ regex [0 ]. rxtype eq ' scan' && $ regex [2 ]. rxtype eq ' pass' {
104
+ my $ simple := $ regex [1 ];
105
+ my $ result := 0 ;
106
+ if $ simple . rxtype eq ' literal' && $ simple . rxtype ne ' ignorecase' && ! $ qast . negate {
107
+ $ result := QAST ::Regex. new (: rxtype<literal >, : subtype<zerowidth >, : node($ simple . node),
108
+ : negate($ simple . negate),
109
+ $ simple [0 ]);
110
+ } elsif $ simple . rxtype eq ' enumcharlist' && $ simple . rxtype ne ' ignorecase' {
111
+ $ result := QAST ::Regex. new (: rxtype<enumcharlist >, : subtype<zerowidth >, : node($ simple . node),
112
+ : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)),
113
+ $ simple [0 ]);
114
+ } elsif $ simple . rxtype eq ' charrange' && $ simple . rxtype ne ' ignorecase' {
115
+ $ result := QAST ::Regex. new (: rxtype<charrange >, : subtype<zerowidth >, : node($ simple . node),
116
+ : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)),
117
+ $ simple [0 ],
118
+ $ simple [1 ],
119
+ $ simple [2 ]);
120
+ } elsif $ simple . rxtype eq ' cclass' && $ simple . rxtype ne ' ignorecase' {
121
+ $ result := QAST ::Regex. new (: rxtype<cclass >, : subtype<zerowidth >, : node($ simple . node),
122
+ : negate(nqp ::bitxor_i($ qast . negate, $ simple . negate)), : name($ simple . name ));
123
+ } else {
124
+ # since before is implicitly anchored, we can remove the scan.
125
+ self . dont_scan($ regex );
126
+ }
127
+ if $ result {
128
+ self . stub_out_block($ qast [0 ][1 ]);
129
+ $ qast := $ result ;
130
+ }
103
131
}
104
132
}
133
+ # positional and named captures are implicitly anchored, so we can get rid of the scan there, too.
134
+ if $ qast . subtype eq ' capture' && $ child_is_block {
135
+ self . dont_scan($ regex );
136
+ }
105
137
}
106
138
$ qast ;
107
139
}
0 commit comments