Skip to content

Commit

Permalink
First cut of Perl 5 style character classes.
Browse files Browse the repository at this point in the history
  • Loading branch information
jnthn committed Aug 23, 2012
1 parent 71ec9af commit 3c6c7ea
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 113 deletions.
150 changes: 58 additions & 92 deletions src/QRegex/P5Regex/Actions.nqp
Expand Up @@ -84,6 +84,64 @@ class QRegex::P5Regex::Actions is HLL::Actions {
);
}

method p5metachar:sym<[ ]>($/) {
make $<cclass>.ast;
}

method cclass($/) {
my $str := '';
my $qast;
my @alts;
for $<charspec> {
if $_[1] {
my $node;
my $lhs;
my $rhs;
if $_[0]<backslash> {
$node := $_[0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$lhs := $node[0];
}
else {
$lhs := ~$_[0][0];
}
if $_[1][0]<backslash> {
$node := $_[1][0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$rhs := $node[0];
}
else {
$rhs := ~$_[1][0][0];
}
my $ord0 := nqp::ord($lhs);
my $ord1 := nqp::ord($rhs);
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
if $ord0 > $ord1;
$str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1;
}
elsif $_[0]<backslash> {
my $bs := $_[0]<backslash>.ast;
$bs.negate(!$bs.negate) if $<sign> eq '^';
@alts.push($bs);
}
else { $str := $str ~ ~$_[0]; }
}
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '^' ) ))
if nqp::chars($str);
$qast := +@alts == 1 ?? @alts[0] !!
$<sign> eq '^' ??
QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
QAST::Regex.new( :rxtype<altseq>, |@alts );
make $qast;
}


method p5quantifier:sym<*>($/) {
my $qast := QAST::Regex.new( :rxtype<quant>, :min(0), :max(-1), :node($/) );
make quantmod($qast, $<quantmod>);
Expand Down Expand Up @@ -354,31 +412,6 @@ class QRegex::P5Regex::Actions is HLL::Actions {
}
make $qast;
}

method assertion:sym<[>($/) {
my $clist := $<cclass_elem>;
my $qast := $clist[0].ast;
if $qast.negate && $qast.rxtype eq 'subrule' {
$qast.subtype('zerowidth');
$qast := QAST::Regex.new(:rxtype<concat>, :node($/),
$qast,
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ));
}
my $i := 1;
my $n := +$clist;
while $i < $n {
my $ast := $clist[$i].ast;
if $ast.negate {
$ast.subtype('zerowidth');
$qast := QAST::Regex.new( $ast, $qast, :rxtype<concat>, :node($/));
}
else {
$qast := QAST::Regex.new( $qast, $ast, :rxtype<altseq>, :node($/));
}
$i++;
}
make $qast;
}

method arg($/) {
make $<quote_EXPR> ?? $<quote_EXPR>.ast !! +$<val>;
Expand All @@ -390,73 +423,6 @@ class QRegex::P5Regex::Actions is HLL::Actions {
make $past;
}

method cclass_elem($/) {
my $str := '';
my $qast;
if $<name> {
my $name := ~$<name>;
$qast := QAST::Regex.new( PAST::Node.new($name), :rxtype<subrule>, :subtype<method>,
:negate( $<sign> eq '-' ), :node($/) );
}
elsif $<uniprop> {
my $uniprop := ~$<uniprop>;
$qast := QAST::Regex.new( $uniprop, :rxtype<uniprop>,
:negate( $<sign> eq '-' && $<invert> ne '!' # $<sign> ^^ $<invert>
|| $<sign> ne '-' && $<invert> eq '!' ), :node($/) );
}
else {
my @alts;
for $<charspec> {
if $_[1] {
my $node;
my $lhs;
my $rhs;
if $_[0]<backslash> {
$node := $_[0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$lhs := $node[0];
}
else {
$lhs := ~$_[0][0];
}
if $_[1][0]<backslash> {
$node := $_[1][0]<backslash>.ast;
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
|| $node.negate || nqp::chars($node[0]) != 1;
$rhs := $node[0];
}
else {
$rhs := ~$_[1][0][0];
}
my $ord0 := nqp::ord($lhs);
my $ord1 := nqp::ord($rhs);
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
if $ord0 > $ord1;
$str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1;
}
elsif $_[0]<backslash> {
my $bs := $_[0]<backslash>.ast;
$bs.negate(!$bs.negate) if $<sign> eq '-';
@alts.push($bs);
}
else { $str := $str ~ ~$_[0]; }
}
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
if nqp::chars($str);
$qast := +@alts == 1 ?? @alts[0] !!
$<sign> eq '-' ??
QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
QAST::Regex.new( :rxtype<altseq>, |@alts );
}
#$qast.negate( $<sign> eq '-' );
make $qast;
}

method mod_internal($/) {
my $n := $<n>[0] gt '' ?? +$<n>[0] !! 1;
%*RX{ ~$<mod_ident><sym> } := $n;
Expand Down
35 changes: 14 additions & 21 deletions src/QRegex/P5Regex/Grammar.nqp
Expand Up @@ -56,6 +56,20 @@ grammar QRegex::P5Regex::Grammar is HLL::Grammar {
token p5metachar:sym<$> {
'$' <?before \W | $>
}
token p5metachar:sym<[ ]> { <?before '['> <cclass> }

token cclass {
'['
$<sign>=['^'|<?>]
$<charspec>=(
\s* ( '\\' <backslash> || (<-[\]\\]>) )
[
\s* '-' \s*
( '\\' <backslash> || (<-[\]\\]>) )
]?
)*
\s* ']'
}

proto token p5backslash { <...> }

Expand Down Expand Up @@ -161,27 +175,6 @@ grammar QRegex::P5Regex::Grammar is HLL::Grammar {
]?
}

token assertion:sym<[> { <?before '['|'+'|'-'|':'> <cclass_elem>+ }

token cclass_elem {
$<sign>=['+'|'-'|<?>]
<.normspace>?
[
| '[' $<charspec>=(
|| \s* '-' <!before \s* ']'> <.obs: '- as character range','.. for range, for explicit - in character class, escape it or place as last thing'>
|| \s* ( '\\' <backslash> || (<-[\]\\]>) )
[
\s* '..' \s*
( '\\' <backslash> || (<-[\]\\]>) )
]?
)*
\s* ']'
| $<name>=[\w+]
| ':' $<invert>=['!'|<?>] $<uniprop>=[\w+]
]
<.normspace>?
}

token mod_internal {
[
| ':' $<n>=('!' | \d+)**1 <mod_ident> »
Expand Down

0 comments on commit 3c6c7ea

Please sign in to comment.