Permalink
Browse files

Pod::Tree parser non working commit

  • Loading branch information...
1 parent 6af1971 commit a171ebbb0420028f2e309ef4a38f0c246c074d6c Martin Berends committed Mar 27, 2009
Showing with 426 additions and 51 deletions.
  1. +1 −1 Configure.p6
  2. +9 −16 Makefile.in
  3. +9 −9 lib/Configure.pm
  4. +22 −0 lib/Pod/Tree/Man.pm
  5. +132 −0 lib/Pod/Tree/Pod6.pm
  6. +60 −0 lib/Pod/Tree/Test.pm
  7. +15 −11 lib/Test/Differences.pm
  8. +20 −14 lib/Test/Harness.pm
  9. +24 −0 t/01-parser.t
  10. +3 −0 t/p01-plain.pod
  11. +14 −0 t/p02-para.pod
  12. +8 −0 t/p03-head.pod
  13. +13 −0 t/p04-code.pod
  14. +23 −0 t/p05-pod5.pod
  15. +17 −0 t/p07-basis.pod
  16. +16 −0 t/p08-code.pod
  17. +40 −0 t/p13-link.pod
View
@@ -1,2 +1,2 @@
# Configure.p6 - installer - see documentation in ../Configure.pm
-use v6; BEGIN { @*INC.push( '..' ); }; use Configure;
+use v6; BEGIN { @*INC.push( './lib' ); }; use Configure;
View
@@ -16,37 +16,30 @@
# Configure.p6 replaces <TOKENS> when converting Makefile.in -> Makefile
PERL6 = <PERL6>
RAKUDO_DIR = <RAKUDO_DIR>
-PERL6LIB = <PERL6LIB>
-PERL6BIN = <PERL6BIN>
all: precompile
-precompile: Parser.pir to/text.pir to/man.pir to/xhtml.pir to/pod5.pir \
- to/pod6.pir \
- $(PERL6LIB)/Test.pir $(PERL6LIB)/Test/Harness.pir \
- $(PERL6LIB)/Test/Differences.pir $(PERL6LIB)/Test/Mock/Parser.pir
+precompile: lib/Pod/Tree/Pod6.pir lib/Pod/Tree/Test.pir \
+ lib/Test.pir lib/Test/Harness.pir lib/Test/Differences.pir
# define how to precompile a module from its source code
.SUFFIXES: .pm .pir
.pm.pir: $(PERL6)
@echo 'precompile $< -> $@'
- @export PERL6LIB=`pwd`/.. ; $(PERL6) --target=pir --output=$@ $<
+ @export PERL6LIB=./lib; $(PERL6) --target=pir --output=$@ $<
# avoid duplication - precompile the Test module from the Parrot source
-$(PERL6LIB)/Test.pir: $(RAKUDO_DIR)/Test.pm $(PERL6)
- @echo 'precompile $(RAKUDO_DIR)/Test.pm -> $(PERL6LIB)/Test.pir'
- @$(PERL6) --target=pir --output=$(PERL6LIB)/Test.pir $(RAKUDO_DIR)/Test.pm
+lib/Test.pir: $(RAKUDO_DIR)/Test.pm $(PERL6)
+ @echo 'precompile $(RAKUDO_DIR)/Test.pm -> lib/Test.pir'
+ @$(PERL6) --target=pir --output=lib/Test.pir $(RAKUDO_DIR)/Test.pm
test: all $(PERL6)
@echo 'starting prove --perl="$(PERL6)" --recurse t'
- @export PERL6LIB=$(PERL6LIB); $(PERL6) $(PERL6BIN)/prove --perl='$(PERL6)' --recurse t
+ @export PERL6LIB=./lib; $(PERL6) bin/prove --perl='$(PERL6)' --recurse t
clean:
- @find . -name '*.pir' -exec rm {} ';' # Parser.pir etc
- @find .. -name 'Test.pir' -exec rm {} ';'
- @find ../Test -name 'Harness.pir' -exec rm {} ';'
- @find ../Test -name 'Differences.pir' -exec rm {} ';'
- @find ../.. -name '*~' -exec rm {} ';' # editor backups
+ @find . -name '*.pir' -exec rm {} ';'
+ @find . -name '*~' -exec rm {} ';'
realclean: clean
@rm Makefile
View
@@ -1,4 +1,4 @@
-# Configure.pm
+# Configure.pm
say "\nConfigure.pm is preparing to make your Makefile.\n";
@@ -12,8 +12,8 @@ say "\nConfigure.pm is preparing to make your Makefile.\n";
my $parrot_dir = %*VM<config><build_dir>;
my $rakudo_dir;
my $perl6;
-# There are now just two possible relationships between the parrot and
-# rakudo directories: rakudo/parrot or parrot/languages/rakudo
+# There are two possible relationships between the parrot and rakudo
+# directories: rakudo/parrot or parrot/languages/rakudo
if $parrot_dir ~~ / <parrot_in_rakudo> / {
# first case, rakudo/parrot for example if installed using new
# 'git clone ...rakudo.git' then 'perl Configure.pl --gen-parrot'
@@ -23,7 +23,7 @@ elsif "$parrot_dir/languages/rakudo" ~~ :d {
# second case, parrot/languages/rakudo if installed the old way
$rakudo_dir = "$parrot_dir/languages/rakudo";
}
-else {
+else { # anything else
say "PARROT_DIR unexpected $parrot_dir";
exit(1);
}
@@ -75,15 +75,15 @@ sub squirt( Str $filename, Str $text ) {
# my/parrot/parrot my/rakudo/perl6.pbc Configure.p6
# 2. The Rakudo "Fake Executable" made by pbc_to_exe:
# my/rakudo/perl6 Configure.p6
-# The rest are variations of 1. and 2. to shorten the command line:
-# 3. A shell script perl6 for 1. 'my/parrot/parrot my/rakudo/perl6.pbc $*':
+# The rest are variations of 1. and 2. to sugar the command line:
+# 3. A shell script perl6 for 1: 'my/parrot/parrot my/rakudo/perl6.pbc $*':
# my/perl6 Configure.p6 # or 'perl6 Configure.p6' with search path
-# 4. A shell alias for 1. perl6='my/parrot/parrot my/rakudo/perl6.pbc':
+# 4. A shell alias for 1: perl6='my/parrot/parrot my/rakudo/perl6.pbc':
# perl6 Configure.p6
-# 5. A symbolic link for 2. 'sudo ln -s /path/to/rakudo/perl6 /bin':
+# 5. A symbolic link for 2: 'sudo ln -s /path/to/rakudo/perl6 /bin':
# perl6 Configure.p6
-# Are there other ways to execute Perl 6 scripts? Please tell the author.
+# Are there other ways to execute Perl 6 scripts? Please tell the maintainers.
regex parrot_in_rakudo { ( .* '/rakudo' ) '/parrot' }
# regex rakudo_in_parrot { .* '/parrot/languages/rakudo' }
View
@@ -0,0 +1,22 @@
+use Pod::Tree::Pod6; # the Pod grammar
+
+class Pod::Tree::Man {
+ has @!output;
+ method TOP($/) { make @!output.join("\n"); }
+ method pod6block($/) { push @!output, "block"; }
+ method begin($/) { push @!output, "begin"; }
+ method body($/) { push @!output, "body"; }
+ method end($/) { push @!output, "end"; }
+ method translate( Str $doc ) {
+ return ~ Pod::Tree::Pod6.parse(
+ $doc, :action( Pod::Tree::Man.new )
+ )
+ }
+}
+
+=begin pod
+
+=head1 NAME
+Pod::Tree::Man - convert Pod to man page
+
+=end pod
View
@@ -0,0 +1,132 @@
+# Warning: Under Construction: nothing works or even makes sense!
+grammar Pod::Tree::Pod6 {
+ regex TOP { ^ <directive> <content> <directive> $ }
+# regex TOP { ^ <beginpod> <content> <endpod> $ }
+# regex TOP { ^ <anyline> * $ {*} }
+ regex anyline { ^^ [ <directive> | <content> ] $$ }
+ regex directive { <beginpod> | <endpod> }
+ regex beginpod { ^^ '=begin ' <typename> $$ \n {*} }
+ regex endpod { ^^ '=end ' <typename> $$ \n {*} }
+ regex typename { pod | para | comment }
+ regex content { ^^ \N* $$ \n {*} }
+# # TODO: everything else
+}
+
+class Pod::Tree::Pod6::ambient { }
+class Pod::Tree::Pod6::directive { }
+class Pod::Tree::Pod6::content { }
+
+=begin pod
+
+=head1 NAME
+Pod::Tree::Pod6 - grammar for Perl 6 tree based Pod processors
+
+=head1 SYNOPSIS
+=begin code
+use Pod::Tree::Xhtml;
+say Pod::Tree::Xhtml.parse( slurp('example.pod') );
+=end code
+
+=head1 DESCRIPTION
+Under construction. Nothing works yet. No finish date promised either.
+
+This is a tree based parser for Perl 6 Pod as specified in
+L<S26|http://perlcabal.org/syn/S26.html>. The draft status of S26 is
+fine, assuming that change can probably be incorporated in future
+revisions. After all, Pod will always be Pod.
+
+Learning lessons from the stream based L<Pod::Parser|http://github.com/eric256/perl6-examples/blob/master/lib/Pod/Parser.pm>,
+this implementation puts all the code into a grammar and an action class.
+The grammar is designed for possible inclusion in STD.pm.
+Every implemented method must be fully tested with both valid and
+invalid cases.
+
+=head1 RATIONALE (please notify author about factual errors)
+Tree based parsers generally use more memory than stream based ones, on
+Pod as well as XML, because all the content must be retained, not just
+the data actively being processed. It's the Unix pipeline filters versus
+the load-save model. The current (March 2009) Parrot and Rakudo
+implementations, however, lack heap memory garbage collection, with dire
+consequences that hamper production use.
+
+The accepted priority for Parrot and Rakudo developers is functionality
+first, optimization second. Fair enough, but nearly every looping, long
+running process leaks memory. The operating system resorts to swapping
+hectically in a hopeless effort to provide more, but Parrot doesn't
+recycle and keeps calling malloc(). After almost hanging the computer
+for a long time the kernel usually kills the Parrot.
+
+Running L<Pod::Server|http://github.com/eric256/perl6-examples/blob/master/lib/Pod/Server.pm>
+and L<Pod::Parser|http://github.com/eric256/perl6-examples/blob/master/lib/Pod/Parser.pm>
+causes huge memory and CPU loads. Some approximate Pod::Server responses:
+
+ Document size time RAM
+ rakudo/docs/compiler_overview.pod 9k 54s 224M
+ rakudo/docs/glossary.pod 4k 19s 132M
+ rakudo/docs/guide_to_setting.pod 3k 15s 114M
+ rakudo/docs/release_guide.pod 1k 5s 94M
+ pugs/docs/Perl6/Spec/S01-overview.pod 5k 28s 158M
+ pugs/docs/Perl6/Spec/S02-bits.pod 151k 600s 1583M (died)
+ pugs/docs/Perl6/Spec/S03-operators.pod 150k
+ pugs/docs/Perl6/Spec/S04-control.pod 58k 340s 1048M (died)
+ pugs/docs/Perl6/Spec/S05-regex.pod 133k
+ pugs/docs/Perl6/Spec/S06-routine.pod 113k
+ pugs/docs/Perl6/Spec/S07-iterators.pod 7k 35s 174M
+ pugs/docs/Perl6/Spec/S09-data.pod 46k 255s 820M (died)
+ pugs/docs/Perl6/Spec/S10-packages.pod 8k 49s 204M
+ pugs/docs/Perl6/Spec/S10-modules.pod 22k 137s 433M
+
+Results on a dual core amd64 4200+, 2GB memory, running Linux. Only one
+CPU core runs each process :(
+
+Seeing memory use up to 15000 times document size is worrying.
+The developer guidelines that apply to Java and .NET probably apply to
+Rakudo as well - keep the application code small and shift as much of
+the heavy processing burden as possible to the standard libraries of the
+language.
+
+In L<code review of Pod::Parser|http://use.perl.org/~masak/journal/38644>
+Carl Mäsak++ wrote "all the grunt work, is made by the Pod6 grammar".
+That observation and the excellent code Matthew Walton has written in
+L<Form.pm|http://github.com/mattw/form/tree/master> inspired this tree
+parser. Through actions, the grammar will not only pattern match the Pod
+syntax but also dispatch to the handler, in most cases an emitter.
+If it works, comparing stream versus tree parsing performance will be
+interesting.
+In the spirit of "prepare to throw one away", let the best solution win.
+
+Stream processing redefines many small strings in frequent loops,
+requesting many heap allocations.
+Tree processing handles an entire document in a single call with
+grammar, regexes and action classes. It might be faster and more compact
+on the current Parrot and Rakudo.
+
+Another reason for this effort is to have another trial implementation
+to discover potential pitfalls before settling the S26 spec.
+
+=head1 TESTING
+A rigorous Test Driven Development policy must be enforced. Adding each
+functional unit begins by adding unit tests that accurately reference
+the specification. Before the arrival of suitable automated tools,
+coverage of the code and the specification must be assessed manually.
+
+=head2 Test Coverage
+
+=head3 Specification Implemented
+
+=head3 Specification To Do
+
+=head3 Code Tested
+
+=head3 Code Untested
+(covering these is always top priority)
+
+=head1 AUTHOR
+Martin Berends (mberends on CPAN github #perl6 and @autoexec.demon.nl).
+
+=head1 SEE ALSO
+L<S26|http://perlcabal.org/syn/S26.html>
+L<http://www.nntp.perl.org/group/perl.perl6.language/2007/07/msg27890.html>
+L<http://www.nntp.perl.org/group/perl.perl6.language/2007/07/msg27894.html>
+
+=end pod
View
@@ -0,0 +1,60 @@
+use Pod::Tree::Pod6; # the Perl 6 Pod grammar
+
+class Pod::Tree::Test {
+ has @!output;
+
+ method TOP($/) {
+ warn "starting TOP";
+ my @keys = @( $/<anyline> );
+ warn "KEYS: {@keys}";
+ make gather for @( $/<anyline> ) -> $m { take $m.ast; }
+ warn "leaving TOP";
+ }
+ method directive($/) {
+ warn "starting directive";
+ my $payload = [@( $/.ast )];
+ make $payload;
+ warn "leaving directive";
+ }
+ method beginpod($/) {
+ warn "starting beginpod";
+ my $payload = [@( $/<typename>.ast )];
+ make $payload;
+ }
+ method endpod($/) {
+ warn "starting endpod";
+ my $payload = [@( $/.ast )];
+ make $payload;
+ }
+ method content($/) {
+ warn "starting content";
+ my $matched = ~ $/;
+ my $payload = [@( $/.ast )];
+ make $payload;
+# warn "matched '$matched'";
+ }
+
+ method parse( Str $doc ) {
+ return ~ Pod::Tree::Pod6.parse(
+ $doc, :action( Pod::Tree::Test.new )
+ )
+ }
+}
+
+# moritz_ but you can always make [@( $/.ast ), $new_value]
+# moritz_ that's basically the same as $/.ast.push($new_value), but it should atually work :-)
+# moritz_ .ast is the same as $(), ie the payload that you put onto the Match object by calling make
+
+=begin pod
+
+=head1 NAME
+Pod::Tree::Test - class to trace parsing of Pod
+
+=head1 SYNOPSIS
+=begin code
+# in shell
+perl6 -e'use Pod::Tree::Test; \
+say Pod::Tree::Test.new.parse("=begin pod\nHello, pod!\n=end pod");'
+=end code
+
+=end pod
View
@@ -2,35 +2,39 @@
use Test;
-multi sub eq_or_diff($got, $expected, Str $desc) is export()
+multi sub eq_or_diff($received, $expected, Str $desc) is export()
{
- is( $got, $expected, $desc ); # from module Test
+ is( $received, $expected, $desc ); # from module Test
- if $got ne $expected {
+ if $received ne $expected {
say "# failed $desc";
- say "# expected: ------------";
+ say "#### expected: " ~ "-" x 20;
my @expected = $expected.split("\n");
my $i = 0;
- say @expected.map({"#" ~ $i++ ~ "# $_"}).join("\n");
- say "# got: -----------------";
- my @got = $got.split("\n");
+ say @expected.map({"#" ~ $i++ ~ "## $_"}).join("\n");
+ say "#### received: " ~ "-" x 20;
+ my @received = $received.split("\n");
$i = 0;
- while $i < @got.elems {
+ while $i < @received.elems {
my $status = "!=";
- if $i < @expected.elems and @got[$i] eq @expected[$i] {
+ if $i < @expected.elems and @received[$i] eq @expected[$i] {
$status = "==";
}
- say "#$i$status {@got[$i]}";
+ say "#$i$status {@received[$i]}";
$i++;
}
}
}
=begin pod
+
=head1 NAME
-Test::Differences -
+Test::Differences - check test results and clearly contrast failures
+
=head1 TODO
Make a side by side comparison layout the way the Perl 5 version does.
+
=head1 SEE ALSO
The Perl 5 L<doc:Test::Differences> by Barrie Slaymaker.
+
=end pod
Oops, something went wrong.

0 comments on commit a171ebb

Please sign in to comment.