Browse files

Generate tokens from description

  • Loading branch information...
1 parent d849484 commit aaf01052a6d35d22b50bc537001e776910588a85 @pstuifzand committed Mar 16, 2012
Showing with 153 additions and 80 deletions.
  1. +8 −1 My_Actions.pm
  2. +28 −35 lib/MarpaX/CodeGen.pm
  3. +70 −0 lib/MarpaX/SimpleLexer.pm
  4. +18 −2 marpa.mp
  5. +29 −42 marparser.pl
View
9 My_Actions.pm
@@ -1,6 +1,13 @@
package My_Actions;
-sub My_Actions::Parser {shift;return {'rules' => \@_ }; }
+sub My_Actions::Parser {my $ast = shift;return $ast;}
+
+sub My_Actions::DeclRule {my $ast = shift; push @{$ast->{rules}}, $_[0]; }
+sub My_Actions::DeclToken {my $ast = shift; push @{$ast->{tokens}}, $_[0]; }
+
+sub My_Actions::TokenRule_0 {shift;return { @{$_[0]}, regex => qr/$_[3]/ }; }
+sub My_Actions::TokenRule_1 {shift;return { @{$_[0]}, char => $_[2] }; }
+
sub My_Actions::Rule {shift;return { @{$_[0]}, @{$_[2]} }; }
sub My_Actions::RuleWithCode {shift;return { @{$_[0]}, @{$_[2]}, code => $_[4] }; }
sub My_Actions::Lhs {shift;return [lhs => $_[0]];}
View
63 lib/MarpaX/CodeGen.pm
@@ -14,54 +14,45 @@ sub generate_code {
use strict;
use Marpa::XS;
use MarpaX::CodeGen 'generate_code';
+use MarpaX::SimpleLexer;
my %tokens = (
- Name => qr/(\w+)/,
- DeclareOp => qr/::=/,
- Plus => qr/\+/,
- Star => qr/\*/,
- CB => qr/{{/,
- CE => qr/}}/,
- Code => qr/(.+)(?=}})/,
-);
-
-sub parse_token_stream {
- my ($grammar, $fh) = @_;
-
- my $r= Marpa::XS::Recognizer->new( { grammar => $grammar } );
-
- LINE: while (<$fh>) {
- my $line = $_;
- chomp $line;
-
- while ($line) {
- $line =~ s/^\s+//;
- next LINE if $line =~ m/^\#/;
-
- for my $token_name (@{$r->terminals_expected}) {
- my $re = $tokens{$token_name};
+HEADER
- if ($line =~ s/^$re//) {
- $r->read($token_name, $1 ? $1 : '');
- }
- }
+ for (@{ $parse_tree->{tokens} }) {
+ if ($_->{regex}) {
+ printf(" %-30s => qr/%s/,\n",$_->{lhs}, $_->{regex});
+ }
+ else {
+ $_->{char} =~ s/^\$//;
+ printf(" %-30s => '%s',\n", $_->{lhs}, $_->{char});
}
}
-
- my $value_ref = $r->value;
- return $$value_ref;
-}
+ #Name => qr/(\w+)/,
+ #DeclareOp => qr/::=/,
+ #Plus => qr/\+/,
+ #Star => qr/\*/,
+ #CB => qr/{{/,
+ #CE => qr/}}/,
+ #Code => qr/(.+)(?=}})/,
+
+ print <<'HEADER';
+);
HEADER
print generate_actions($parse_tree, $config);
print generate_parser_code($parse_tree, $config);
print <<'OUT';
+my $simple_lexer = MarpaX::SimpleLexer->new({
+ create_grammar => \&create_grammar,
+ tokens => \%tokens,
+});
+
open my $fh, '<', $ARGV[0] or die "Can't open $ARGV[0]";
-my $grammar = create_grammar();
-my $parse_tree = parse_token_stream($grammar, $fh);
+my $parse_tree = $simple_lexer->parse($fh);
my $config = { namespace => 'My_Actions' };
generate_code($parse_tree, $config);
@@ -81,6 +72,7 @@ sub create_grammar {
{ start => 'Parser',
actions => '$namespace',
PRE
+
$out .= generate_rules($parse_tree, $config);
$out .= <<'POST';
@@ -95,7 +87,8 @@ POST
sub generate_rules {
my ($parse_tree) = @_;
- my $out = Dumper($parse_tree);
+ my $rules = $parse_tree->{rules};
+ my $out = Dumper({rules=>$rules});
$out =~ s/\$VAR\d+\s+=\s+{//;
$out =~ s/};\n$/,/s;
return $out;
View
70 lib/MarpaX/SimpleLexer.pm
@@ -0,0 +1,70 @@
+package MarpaX::SimpleLexer;
+use strict;
+use 5.14.2;
+
+sub new {
+ my ($klass, $options) = @_;
+
+ my $self = bless {
+ create_grammar => $options->{create_grammar},
+ tokens => $options->{tokens},
+ }, $klass;
+
+ $self->_create_grammar();
+ return $self;
+}
+
+sub _create_grammar {
+ my ($self) = @_;
+ $self->{grammar} = $self->{create_grammar}->([keys %{$self->{tokens}}]);
+ return;
+}
+
+sub parse {
+ my ($self, $fh) = @_;
+ return $self->_parse_token_stream($fh);
+}
+
+sub _parse_token_stream {
+ my ($self, $fh) = @_;
+
+ my $r = Marpa::XS::Recognizer->new( { grammar => $self->{grammar} } );
+
+ my $c = 0;
+ LINE: while (<$fh>) {
+ my $line = $_;
+ chomp $line;
+
+# say STDERR "=====================";
+ while ($line) {
+ $line =~ s/^\s+//;
+# say STDERR "Line: $line";
+ next LINE if $line =~ m/^\#/;
+
+ for my $token_name (@{$r->terminals_expected}) {
+# say STDERR "Token: $token_name";
+ my $re = $self->{tokens}{$token_name};
+
+ if (ref($re) eq 'Regexp') {
+ if ($line =~ s/^($re)//) {
+ $r->read($token_name, $1 ? $1 : '');
+ }
+ }
+ else {
+ my ($char, $rest) = split(//, $line, 2);
+
+ if ($re eq $char) {
+ $r->read($token_name, $char ? $char : '');
+ $line = $rest;
+ }
+ }
+ }
+ }
+ }
+
+ my $value_ref = $r->value;
+ return $$value_ref;
+}
+
+1;
+
View
20 marpa.mp
@@ -14,12 +14,28 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-Parser ::= Rule+ {{ shift; return { 'rules' => \@_ } }}
+Name = /\w+/
+DeclareOp = /::=/
+Plus = $+
+Star = $*
+CB = /{{/
+CE = /}}/
+Code = /(?<!{{)\s*(.+)\s*(?=}})/
+SLASH = $/
+EQ = $=
+RX = /(?<!\/)(.+)(?=(?<!\/))/
+Char = /\$(.)/
+
+Parser ::= Decl+ {{ return $_[0]; }}
+Decl ::= Rule {{ push @{$_[0]->{rules}}, $_[1] }}
+Decl ::= TokenRule {{ push @{$_[0]->{tokens}}, $_[1] }}
+TokenRule ::= Lhs EQ SLASH RX SLASH {{ shift; return { @{$_[0]}, regex => qr/$_[3]/ } }}
+TokenRule ::= Lhs EQ Char {{ shift; return { @{$_[0]}, 'char' => $_[2] } }}
Rule ::= Lhs DeclareOp Rhs {{ shift; return { @{$_[0]}, @{$_[2]} } }}
Rule ::= Lhs DeclareOp Rhs CB Code CE {{ shift; return { @{$_[0]}, @{$_[2]}, code => $_[4] } }}
Lhs ::= Name {{ shift; return [ lhs => $_[0] ] }}
Rhs ::= Names {{ shift; return [ rhs => $_[0] ] }}
-Rhs ::= Names Plus {{ shift; return [ rhs => $_[0], min => 1 ] }}
Rhs ::= Names Star {{ shift; return [ rhs => $_[0], min => 0 ] }}
+Rhs ::= Names Plus {{ shift; return [ rhs => $_[0], min => 1 ] }}
Names ::= Name+ {{ shift; return [ @_ ]; }}
View
71 marparser.pl
@@ -17,6 +17,7 @@
use v5.10;
use strict;
use MarpaX::CodeGen 'generate_code';
+use MarpaX::SimpleLexer;
use Marpa::XS;
use Data::Dumper;
use My_Actions;
@@ -26,69 +27,55 @@
my %tokens = (
Name => qr/(\w+)/,
DeclareOp => qr/::=/,
- Plus => qr/\+/,
- Star => qr/\*/,
+ Plus => '+',
+ Star => '*',
CB => qr/{{/,
CE => qr/}}/,
- Code => qr/(.+)(?=}})/,
+ Code => qr/(?<!{{)\s*(.+)\s*(?=}})/,
+ EQ => '=',
+ Char => qr/\$(.)/,
+ SLASH => '/',
+ RX => qr/(?<!\/)(.+)(?=(?<!\/))/,
);
sub create_grammar {
+ my ($terminals) = @_;
+
my $grammar = Marpa::XS::Grammar->new(
{ start => 'Parser',
actions => 'My_Actions',
rules => [
- { lhs => 'Parser', rhs => [qw/Rule/], min => 1 },
- { lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs/], action => 'Rule' },
- { lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs CB Code CE/], action => 'RuleWithCode' },
- { lhs => 'Lhs', rhs => [qw/Name/] },
- { lhs => 'Rhs', rhs => [qw/Names/] },
- { lhs => 'Rhs', rhs => [qw/Names Plus/], action => 'Plus' },
- { lhs => 'Rhs', rhs => [qw/Names Star/], action => 'Star' },
- { lhs => 'Names', rhs => [qw/Name/], min => 1 },
+ { lhs => 'Parser', rhs => [qw/Decl/], min => 1 },
+ { lhs => 'Decl', rhs => [qw/Rule/], action => 'DeclRule' },
+ { lhs => 'Decl', rhs => [qw/TokenRule/], action => 'DeclToken' },
+ { lhs => 'TokenRule', rhs => [qw/Lhs EQ SLASH RX SLASH/], action => 'TokenRule_0' },
+ { lhs => 'TokenRule', rhs => [qw/Lhs EQ Char/], action => 'TokenRule_1' },
+ { lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs/], action => 'Rule' },
+ { lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs CB Code CE/], action => 'RuleWithCode' },
+ { lhs => 'Lhs', rhs => [qw/Name/] },
+ { lhs => 'Rhs', rhs => [qw/Names/] },
+ { lhs => 'Rhs', rhs => [qw/Names Plus/], action => 'Plus' },
+ { lhs => 'Rhs', rhs => [qw/Names Star/], action => 'Star' },
+ { lhs => 'Names', rhs => [qw/Name/], min => 1 },
],
- terminals => [keys %tokens],
+ terminals => $terminals,
}
);
$grammar->precompute();
return $grammar;
}
-sub parse_token_stream {
- my ($grammar, $fh) = @_;
-
- my $r= Marpa::XS::Recognizer->new( { grammar => $grammar } );
-
- LINE: while (<$fh>) {
- my $line = $_;
- chomp $line;
-
- while ($line) {
- $line =~ s/^\s+//;
- next LINE if $line =~ m/^\#/;
-
- for my $token_name (@{$r->terminals_expected}) {
- my $re = $tokens{$token_name};
-
- if ($line =~ s/^$re//) {
- $r->read($token_name, $1 ? $1 : '');
- }
- }
- }
- }
-
- my $value_ref = $r->value;
- return $$value_ref;
-}
-
+my $simple_lexer = MarpaX::SimpleLexer->new({
+ create_grammar => \&create_grammar,
+ tokens => \%tokens,
+});
open my $fh, '<', $ARGV[0] or die "Can't open $ARGV[0]";
-my $grammar = create_grammar();
-my $parse_tree = parse_token_stream($grammar, $fh);
+my $parse_tree = $simple_lexer->parse($fh);
+#print Dumper($parse_tree);
my $config = { namespace => 'My_Actions' };
generate_code($parse_tree,$config);
-

0 comments on commit aaf0105

Please sign in to comment.