Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: pstuifzand/Marpa-Simple-Lexer
base: 0260f56bb4
...
head fork: pstuifzand/Marpa-Simple-Lexer
compare: 31a7de7e1c
  • 6 commits
  • 6 files changed
  • 0 commit comments
  • 2 contributors
Commits on Apr 10, 2012
@ruz ruz Merge pull request #1 from pstuifzand/master
Added testcases as actual test
8311a72
Commits on Apr 12, 2012
@ruz ruz refactor tests and add more tests c644686
@ruz ruz properly handle exhaustion
* parser can be exhausted after first round
* exhauseted method should be checked after every earleme_complete
* buffer was not properly cut on exhaustion

Instead we catch exception from earleme_complete and propery cut buffer
7bb1014
@ruz ruz call end_input when parser is exhausted b8c572a
@ruz ruz update MANIFEST and README 57c8abc
Commits on Apr 14, 2012
@pstuifzand Allow for recognition in parts
Added partial_recognize method
- Recognize a bit of input and return to the caller
Added at_end method
- This allows you to say that your at then end of the input
31a7de7
View
3  MANIFEST
@@ -9,8 +9,9 @@ inc/Module/Install/ReadmeFromPod.pm
inc/Module/Install/Win32.pm
inc/Module/Install/WriteAll.pm
lib/MarpaX/Simple/Lexer.pm
-t/test-cases.t
+lib/MarpaX/Simple/Lexer/Test.pm
Makefile.PL
MANIFEST This list of files
META.yml
README
+t/basics.t
View
31 README
@@ -34,9 +34,8 @@ SYNOPSIS
use Regexp::Common qw /delimited/;
- my $lexer = MarpaX::Simple::Lexer->new(
+ my $lexer = MyLexer->new(
recognizer => $recognizer,
- input_filter => sub { ${$_[0]} =~ s/[\r\n]+//g },
tokens => {
word => qr{\b\w+\b},
'quoted' => qr[$RE{delimited}{-delim=>qq{\'\"}}],
@@ -60,6 +59,17 @@ SYNOPSIS
return scalar @children > 1 ? \@children : shift @children;
}
+ package MyLexer;
+ use base 'MarpaX::Simple::Lexer';
+
+ sub grow_buffer {
+ my $self = shift;
+ my $rv = $self->SUPER::grow_buffer( @_ );
+ ${ $self->buffer } =~ s/[\r\n]+//g;
+ return $rv;
+ }
+
+ package main;
__DATA__
hello !world OR "he hehe hee" ( foo OR !boo )
@@ -241,14 +251,17 @@ TUTORIAL
],
Filtering input
- Input can be filtered with a callback by providing input_filter
- argument:
+ Input can be filtered with subclassing grow_buffer method:
- my $lexer = MarpaX::Simple::Lexer->new(
- recognizer => $recognizer,
- input_filter => sub { ${$_[0]} =~ s/[\r\n]+//g },
- ...
- );
+ package MyLexer;
+ use base 'MarpaX::Simple::Lexer';
+
+ sub grow_buffer {
+ my $self = shift;
+ my $rv = $self->SUPER::grow_buffer( @_ );
+ ${ $self->buffer } =~ s/[\r\n]+//g;
+ return $rv;
+ }
Actions
The simplest possible action that can produce some results:
View
42 lib/MarpaX/Simple/Lexer.pm
@@ -368,6 +368,25 @@ sub grow_buffer {
}
sub recognize {
+ my ($self, $fh) = @_;
+ my $rec = $self->_recognize($fh);
+ $rec->end_input;
+ return $rec;
+}
+
+sub partial_recognize {
+ my ($self, $fh) = @_;
+ return $self->_recognize($fh);
+}
+
+sub at_end {
+ my ($self) = @_;
+ my $rec = $self->{'recognizer'};
+ $rec->end_input;
+ return $rec;
+}
+
+sub _recognize {
my $self = shift;
my $fh = shift;
@@ -387,7 +406,6 @@ sub recognize {
if $self->{'debug'};
my $first_char = substr $$buffer, 0, 1;
- my $at_least_on_match = 0;
foreach my $token ( @$expected ) {
REDO:
@@ -427,29 +445,25 @@ sub recognize {
if $self->{'debug'};
$rec->alternative( $token, $match, $length );
- $at_least_on_match = 1;
}
say STDERR '' if $self->{'debug'};
- my $skip = 1;
- unless ( $rec->earleme_complete ) {
- if ( $rec->exhausted ) {
- # if didn't find match and parser is exhausted
- # then we lost
- die "exhausted" unless $at_least_on_match;
-
- # otherwise we won
- $rec->end_input;
- return $rec;
+ my $skip = 0;
+ {
+ local $@;
+ while ( !(my $status = eval { $rec->earleme_complete }) ) {
+ unless ( defined $status ) {
+ substr $$buffer, 0, $skip, '';
+ return $rec;
+ }
+ $skip++;
}
- $skip++ while !$rec->earleme_complete;
$skip++;
}
substr $$buffer, 0, $skip, '';
$buffer_can_grow = $self->grow_buffer( $fh )
if $buffer_can_grow && $self->{'min_buffer'} > length $$buffer;
}
- $rec->end_input;
return $rec;
}
View
60 lib/MarpaX/Simple/Lexer/Test.pm
@@ -0,0 +1,60 @@
+use 5.010; use strict; use warnings;
+
+package MarpaX::Simple::Lexer::Test;
+
+use Marpa::XS;
+use MarpaX::Simple::Lexer;
+
+sub simple_lexer {
+ my $self = shift;
+ my %args = (@_);
+ my $grammar = Marpa::XS::Grammar->new({
+ actions => 'MarpaX::Simple::Lexer::Test::Actions',
+ start => 'text',
+ default_action => 'do_what_I_mean',
+ rules => [
+ [ 'text' => [ 'word' ] ],
+ ],
+ lhs_terminals => 0,
+ (
+ map { $_ => $args{$_} } grep exists $args{$_},
+ qw(start rules lhs_terminals default_action),
+ ),
+ });
+ $grammar->precompute;
+ my $recognizer = Marpa::XS::Recognizer->new( { grammar => $grammar } );
+ my $lexer = MarpaX::Simple::Lexer->new(
+ tokens => { word => 'test' },
+ %args,
+ recognizer => $recognizer,
+ );
+
+ return ($lexer, $recognizer, $grammar);
+}
+
+sub recognize {
+ my $self = shift;
+ my %args = (@_);
+
+ my $input = delete $args{'input'};
+ my $io;
+ unless ( ref $input ) {
+ open $io, '<', \$input;
+ } else {
+ $io = $input;
+ }
+
+ my @res = $self->simple_lexer( %args );
+ $res[0]->recognize( $io );
+ return @res;
+}
+
+package MarpaX::Simple::Lexer::Test::Actions;
+
+sub do_what_I_mean {
+ shift;
+ my @children = grep defined && length, @_;
+ return scalar @children > 1 ? \@children : shift @children;
+}
+
+1;
View
48 t/basics.t
@@ -0,0 +1,48 @@
+#!/usr/bin/env perl
+
+use Test::More tests => 10;
+use MarpaX::Simple::Lexer::Test;
+my $test = 'MarpaX::Simple::Lexer::Test';
+
+# simple success cases matching whole input
+for my $case (
+ { tokens => { word => qr/X/ }, input => "X" },
+ { tokens => { word => qr/XX/ }, input => "XX" },
+) {
+ my ($lexer, $rec) = $test->recognize( %$case );
+ is_deeply( $rec->value, \$case->{'input'} );
+ is ${$lexer->buffer}, '';
+}
+
+# success case matching prefix
+{
+ my ($lexer, $rec) = $test->recognize(
+ tokens => { word => qr/X/ }, input => "XY",
+ );
+ is_deeply( $rec->value, \"X" );
+ is ${$lexer->buffer}, 'Y';
+}
+
+# simple failure case at the beginning
+{
+ my ($lexer, $rec) = $test->recognize(
+ tokens => { word => qr/X/ },
+ input => "Y",
+ );
+ is $rec->value, undef, 'failed to match';
+ is ${$lexer->buffer}, 'Y';
+}
+
+# sequence that can not continue
+# XXX: match fails and it's sad
+{
+ my ($lexer, $rec) = $test->recognize(
+ rules => [
+ { lhs => 'text', rhs => ['word'], min => 1 }
+ ],
+ tokens => { word => qr/X/ },
+ input => "XXY",
+ );
+ is $rec->value, undef;
+ is ${$lexer->buffer}, 'Y';
+}
View
44 t/test-cases.t
@@ -1,44 +0,0 @@
-use Test::More;
-use Test::Exception;
-
-use Marpa::XS;
-use MarpaX::Simple::Lexer;
-use IO::String;
-use Data::Dumper;
-
-sub X {
- return 1;
-}
-
-my $grammar = Marpa::XS::Grammar->new( {
- actions => 'main',
- start => 'Parser',
- rules => [
- [ 'Parser' => [ 'X' ], 'X' ],
- ],
- terminals => [qw/X/],
-});
-
-$grammar->precompute;
-
-my @test_cases = (
- [ {X => qr/X/}, "X" ],
- [ {X => qr/XX/}, "XX" ],
-);
-
-for my $case (@test_cases) {
- my $recognizer = Marpa::XS::Recognizer->new( { grammar => $grammar } );
- my $lexer = MarpaX::Simple::Lexer->new(
- recognizer => $recognizer,
- tokens => $case->[0],
- );
-
- my $io = IO::String->new($case->[1]);
- lives_ok {
- $lexer->recognize($io);
- };
- is(${ $recognizer->value }, 1);
-}
-
-done_testing();
-

No commit comments for this range

Something went wrong with that request. Please try again.