Skip to content

Commit

Permalink
Generate tokens from description
Browse files Browse the repository at this point in the history
  • Loading branch information
pstuifzand committed Mar 16, 2012
1 parent d849484 commit aaf0105
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 80 deletions.
9 changes: 8 additions & 1 deletion My_Actions.pm
@@ -1,6 +1,13 @@
package My_Actions;

sub My_Actions::Parser {shift;return {'rules' => \@_ }; }
sub My_Actions::Parser {my $ast = shift;return $ast;}

sub My_Actions::DeclRule {my $ast = shift; push @{$ast->{rules}}, $_[0]; }
sub My_Actions::DeclToken {my $ast = shift; push @{$ast->{tokens}}, $_[0]; }

sub My_Actions::TokenRule_0 {shift;return { @{$_[0]}, regex => qr/$_[3]/ }; }
sub My_Actions::TokenRule_1 {shift;return { @{$_[0]}, char => $_[2] }; }

sub My_Actions::Rule {shift;return { @{$_[0]}, @{$_[2]} }; }
sub My_Actions::RuleWithCode {shift;return { @{$_[0]}, @{$_[2]}, code => $_[4] }; }
sub My_Actions::Lhs {shift;return [lhs => $_[0]];}
Expand Down
63 changes: 28 additions & 35 deletions lib/MarpaX/CodeGen.pm
Expand Up @@ -14,54 +14,45 @@ sub generate_code {
use strict;
use Marpa::XS;
use MarpaX::CodeGen 'generate_code';
use MarpaX::SimpleLexer;
my %tokens = (
Name => qr/(\w+)/,
DeclareOp => qr/::=/,
Plus => qr/\+/,
Star => qr/\*/,
CB => qr/{{/,
CE => qr/}}/,
Code => qr/(.+)(?=}})/,
);
sub parse_token_stream {
my ($grammar, $fh) = @_;
my $r= Marpa::XS::Recognizer->new( { grammar => $grammar } );
LINE: while (<$fh>) {
my $line = $_;
chomp $line;
while ($line) {
$line =~ s/^\s+//;
next LINE if $line =~ m/^\#/;
for my $token_name (@{$r->terminals_expected}) {
my $re = $tokens{$token_name};
HEADER

if ($line =~ s/^$re//) {
$r->read($token_name, $1 ? $1 : '');
}
}
for (@{ $parse_tree->{tokens} }) {
if ($_->{regex}) {
printf(" %-30s => qr/%s/,\n",$_->{lhs}, $_->{regex});
}
else {
$_->{char} =~ s/^\$//;
printf(" %-30s => '%s',\n", $_->{lhs}, $_->{char});
}
}
my $value_ref = $r->value;
return $$value_ref;
}

#Name => qr/(\w+)/,
#DeclareOp => qr/::=/,
#Plus => qr/\+/,
#Star => qr/\*/,
#CB => qr/{{/,
#CE => qr/}}/,
#Code => qr/(.+)(?=}})/,

print <<'HEADER';
);
HEADER

print generate_actions($parse_tree, $config);
print generate_parser_code($parse_tree, $config);

print <<'OUT';
my $simple_lexer = MarpaX::SimpleLexer->new({
create_grammar => \&create_grammar,
tokens => \%tokens,
});
open my $fh, '<', $ARGV[0] or die "Can't open $ARGV[0]";
my $grammar = create_grammar();
my $parse_tree = parse_token_stream($grammar, $fh);
my $parse_tree = $simple_lexer->parse($fh);
my $config = { namespace => 'My_Actions' };
generate_code($parse_tree, $config);
Expand All @@ -81,6 +72,7 @@ sub create_grammar {
{ start => 'Parser',
actions => '$namespace',
PRE

$out .= generate_rules($parse_tree, $config);

$out .= <<'POST';
Expand All @@ -95,7 +87,8 @@ POST

sub generate_rules {
my ($parse_tree) = @_;
my $out = Dumper($parse_tree);
my $rules = $parse_tree->{rules};
my $out = Dumper({rules=>$rules});
$out =~ s/\$VAR\d+\s+=\s+{//;
$out =~ s/};\n$/,/s;
return $out;
Expand Down
70 changes: 70 additions & 0 deletions lib/MarpaX/SimpleLexer.pm
@@ -0,0 +1,70 @@
package MarpaX::SimpleLexer;
use strict;
use 5.14.2;

sub new {
my ($klass, $options) = @_;

my $self = bless {
create_grammar => $options->{create_grammar},
tokens => $options->{tokens},
}, $klass;

$self->_create_grammar();
return $self;
}

sub _create_grammar {
my ($self) = @_;
$self->{grammar} = $self->{create_grammar}->([keys %{$self->{tokens}}]);
return;
}

sub parse {
my ($self, $fh) = @_;
return $self->_parse_token_stream($fh);
}

sub _parse_token_stream {
my ($self, $fh) = @_;

my $r = Marpa::XS::Recognizer->new( { grammar => $self->{grammar} } );

my $c = 0;
LINE: while (<$fh>) {
my $line = $_;
chomp $line;

# say STDERR "=====================";
while ($line) {
$line =~ s/^\s+//;
# say STDERR "Line: $line";
next LINE if $line =~ m/^\#/;

for my $token_name (@{$r->terminals_expected}) {
# say STDERR "Token: $token_name";
my $re = $self->{tokens}{$token_name};

if (ref($re) eq 'Regexp') {
if ($line =~ s/^($re)//) {
$r->read($token_name, $1 ? $1 : '');
}
}
else {
my ($char, $rest) = split(//, $line, 2);

if ($re eq $char) {
$r->read($token_name, $char ? $char : '');
$line = $rest;
}
}
}
}
}

my $value_ref = $r->value;
return $$value_ref;
}

1;

20 changes: 18 additions & 2 deletions marpa.mp
Expand Up @@ -14,12 +14,28 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

Parser ::= Rule+ {{ shift; return { 'rules' => \@_ } }}
Name = /\w+/
DeclareOp = /::=/
Plus = $+
Star = $*
CB = /{{/
CE = /}}/
Code = /(?<!{{)\s*(.+)\s*(?=}})/
SLASH = $/
EQ = $=
RX = /(?<!\/)(.+)(?=(?<!\/))/
Char = /\$(.)/

Parser ::= Decl+ {{ return $_[0]; }}
Decl ::= Rule {{ push @{$_[0]->{rules}}, $_[1] }}
Decl ::= TokenRule {{ push @{$_[0]->{tokens}}, $_[1] }}
TokenRule ::= Lhs EQ SLASH RX SLASH {{ shift; return { @{$_[0]}, regex => qr/$_[3]/ } }}
TokenRule ::= Lhs EQ Char {{ shift; return { @{$_[0]}, 'char' => $_[2] } }}
Rule ::= Lhs DeclareOp Rhs {{ shift; return { @{$_[0]}, @{$_[2]} } }}
Rule ::= Lhs DeclareOp Rhs CB Code CE {{ shift; return { @{$_[0]}, @{$_[2]}, code => $_[4] } }}
Lhs ::= Name {{ shift; return [ lhs => $_[0] ] }}
Rhs ::= Names {{ shift; return [ rhs => $_[0] ] }}
Rhs ::= Names Plus {{ shift; return [ rhs => $_[0], min => 1 ] }}
Rhs ::= Names Star {{ shift; return [ rhs => $_[0], min => 0 ] }}
Rhs ::= Names Plus {{ shift; return [ rhs => $_[0], min => 1 ] }}
Names ::= Name+ {{ shift; return [ @_ ]; }}

71 changes: 29 additions & 42 deletions marparser.pl
Expand Up @@ -17,6 +17,7 @@
use v5.10;
use strict;
use MarpaX::CodeGen 'generate_code';
use MarpaX::SimpleLexer;
use Marpa::XS;
use Data::Dumper;
use My_Actions;
Expand All @@ -26,69 +27,55 @@
my %tokens = (
Name => qr/(\w+)/,
DeclareOp => qr/::=/,
Plus => qr/\+/,
Star => qr/\*/,
Plus => '+',
Star => '*',
CB => qr/{{/,
CE => qr/}}/,
Code => qr/(.+)(?=}})/,
Code => qr/(?<!{{)\s*(.+)\s*(?=}})/,
EQ => '=',
Char => qr/\$(.)/,
SLASH => '/',
RX => qr/(?<!\/)(.+)(?=(?<!\/))/,
);

sub create_grammar {
my ($terminals) = @_;

my $grammar = Marpa::XS::Grammar->new(
{ start => 'Parser',
actions => 'My_Actions',
rules => [
{ lhs => 'Parser', rhs => [qw/Rule/], min => 1 },
{ lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs/], action => 'Rule' },
{ lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs CB Code CE/], action => 'RuleWithCode' },
{ lhs => 'Lhs', rhs => [qw/Name/] },
{ lhs => 'Rhs', rhs => [qw/Names/] },
{ lhs => 'Rhs', rhs => [qw/Names Plus/], action => 'Plus' },
{ lhs => 'Rhs', rhs => [qw/Names Star/], action => 'Star' },
{ lhs => 'Names', rhs => [qw/Name/], min => 1 },
{ lhs => 'Parser', rhs => [qw/Decl/], min => 1 },
{ lhs => 'Decl', rhs => [qw/Rule/], action => 'DeclRule' },
{ lhs => 'Decl', rhs => [qw/TokenRule/], action => 'DeclToken' },
{ lhs => 'TokenRule', rhs => [qw/Lhs EQ SLASH RX SLASH/], action => 'TokenRule_0' },
{ lhs => 'TokenRule', rhs => [qw/Lhs EQ Char/], action => 'TokenRule_1' },
{ lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs/], action => 'Rule' },
{ lhs => 'Rule', rhs => [qw/Lhs DeclareOp Rhs CB Code CE/], action => 'RuleWithCode' },
{ lhs => 'Lhs', rhs => [qw/Name/] },
{ lhs => 'Rhs', rhs => [qw/Names/] },
{ lhs => 'Rhs', rhs => [qw/Names Plus/], action => 'Plus' },
{ lhs => 'Rhs', rhs => [qw/Names Star/], action => 'Star' },
{ lhs => 'Names', rhs => [qw/Name/], min => 1 },
],
terminals => [keys %tokens],
terminals => $terminals,
}
);

$grammar->precompute();
return $grammar;
}

sub parse_token_stream {
my ($grammar, $fh) = @_;

my $r= Marpa::XS::Recognizer->new( { grammar => $grammar } );

LINE: while (<$fh>) {
my $line = $_;
chomp $line;

while ($line) {
$line =~ s/^\s+//;
next LINE if $line =~ m/^\#/;

for my $token_name (@{$r->terminals_expected}) {
my $re = $tokens{$token_name};

if ($line =~ s/^$re//) {
$r->read($token_name, $1 ? $1 : '');
}
}
}
}

my $value_ref = $r->value;
return $$value_ref;
}

my $simple_lexer = MarpaX::SimpleLexer->new({
create_grammar => \&create_grammar,
tokens => \%tokens,
});

open my $fh, '<', $ARGV[0] or die "Can't open $ARGV[0]";

my $grammar = create_grammar();
my $parse_tree = parse_token_stream($grammar, $fh);
my $parse_tree = $simple_lexer->parse($fh);
#print Dumper($parse_tree);

my $config = { namespace => 'My_Actions' };
generate_code($parse_tree,$config);


0 comments on commit aaf0105

Please sign in to comment.