Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

initial import of Pod::Simple 0.90 from CPAN

git-cpan-module: Pod::Simple
git-cpan-version: 0.90
git-cpan-authorid: SBURKE
  • Loading branch information...
commit 57d957757dcb24b4c727c5ed4565dc05f0a80b59 0 parents
Sean M. Burke authored schwern committed
Showing with 12,523 additions and 0 deletions.
  1. +6 −0 ChangeLog
  2. +58 −0 MANIFEST
  3. +6 −0 MANIFEST.SKIP
  4. +34 −0 Makefile.PL
  5. +5 −0 README
  6. +1,334 −0 lib/Pod/Simple.pm
  7. +216 −0 lib/Pod/Simple.pod
  8. +1,266 −0 lib/Pod/Simple/BlackBox.pm
  9. +172 −0 lib/Pod/Simple/Checker.pm
  10. +99 −0 lib/Pod/Simple/Debug.pm
  11. +125 −0 lib/Pod/Simple/DumpAsText.pm
  12. +135 −0 lib/Pod/Simple/DumpAsXML.pm
  13. +263 −0 lib/Pod/Simple/HTML.pm
  14. +106 −0 lib/Pod/Simple/LinkSection.pm
  15. +75 −0 lib/Pod/Simple/Methody.pm
  16. +280 −0 lib/Pod/Simple/PullParser.pm
  17. +58 −0 lib/Pod/Simple/PullParserEndToken.pm
  18. +72 −0 lib/Pod/Simple/PullParserStartToken.pm
  19. +57 −0 lib/Pod/Simple/PullParserTextToken.pm
  20. +80 −0 lib/Pod/Simple/PullParserToken.pm
  21. +101 −0 lib/Pod/Simple/SimpleTree.pm
  22. +444 −0 lib/Pod/Simple/Subclassing.pod
  23. +152 −0 lib/Pod/Simple/Text.pm
  24. +86 −0 lib/Pod/Simple/TextContent.pm
  25. +61 −0 lib/Pod/Simple/TiedOutFH.pm
  26. +131 −0 lib/Pod/Simple/XMLOutStream.pm
  27. +137 −0 t/ac_c_extend.t
  28. +88 −0 t/ac_c_simple.t
  29. +87 −0 t/ac_d.t
  30. +87 −0 t/basic.t
  31. +451 −0 t/beginend.t
  32. +79 −0 t/cbacks.t
  33. +43 −0 t/chunking.t
  34. +106 −0 t/fcodes.t
  35. +92 −0 t/fcodes_ee.t
  36. +406 −0 t/fcodes_ell.t
  37. +78 −0 t/fcodes_ess.t
  38. +101 −0 t/for.t
  39. +103 −0 t/fornot.t
  40. +86 −0 t/heads.t
  41. +220 −0 t/items.t
  42. +64 −0 t/linkclass.t
  43. +352 −0 t/puller.t
  44. +145 −0 t/render.t
  45. +51 −0 t/sanity_tfh.t
  46. +151 −0 t/stree.t
  47. +8 −0 t/test_junk1.pod
  48. +5 −0 t/test_junk1_out.txt
  49. +6 −0 t/test_junk2.pod
  50. +13 −0 t/test_junk2_out.txt
  51. +592 −0 t/test_old_perlcygwin.pod
  52. +249 −0 t/test_old_perlcygwin_out.txt
  53. +814 −0 t/test_old_perlfaq3.pod
  54. +266 −0 t/test_old_perlfaq3_out.txt
  55. +1,234 −0 t/test_old_perlvar.pod
  56. +406 −0 t/test_old_perlvar_out.txt
  57. +363 −0 t/verbatims.t
  58. +218 −0 t/x_nixer.t
6 ChangeLog
@@ -0,0 +1,6 @@
+# ChangeLog for Pod::Simple dist # Time-stamp: "2002-09-11 01:19:39 MDT"
+
+2002-09-11 Sean M. Burke <sburke@cpan.org>
+ * Release 0.90
+ * Beta release. Much documentation still to write, many features
+ still to add. The APIs might change in future versions.
58 MANIFEST
@@ -0,0 +1,58 @@
+ChangeLog
+MANIFEST
+MANIFEST.SKIP
+Makefile.PL
+README
+lib/Pod/Simple.pm
+lib/Pod/Simple.pod
+lib/Pod/Simple/BlackBox.pm
+lib/Pod/Simple/Checker.pm
+lib/Pod/Simple/Debug.pm
+lib/Pod/Simple/DumpAsText.pm
+lib/Pod/Simple/DumpAsXML.pm
+lib/Pod/Simple/HTML.pm
+lib/Pod/Simple/LinkSection.pm
+lib/Pod/Simple/Methody.pm
+lib/Pod/Simple/PullParser.pm
+lib/Pod/Simple/PullParserEndToken.pm
+lib/Pod/Simple/PullParserStartToken.pm
+lib/Pod/Simple/PullParserTextToken.pm
+lib/Pod/Simple/PullParserToken.pm
+lib/Pod/Simple/SimpleTree.pm
+lib/Pod/Simple/Subclassing.pod
+lib/Pod/Simple/Text.pm
+lib/Pod/Simple/TextContent.pm
+lib/Pod/Simple/TiedOutFH.pm
+lib/Pod/Simple/XMLOutStream.pm
+t/ac_c_extend.t
+t/ac_c_simple.t
+t/ac_d.t
+t/basic.t
+t/beginend.t
+t/cbacks.t
+t/chunking.t
+t/fcodes.t
+t/fcodes_ee.t
+t/fcodes_ell.t
+t/fcodes_ess.t
+t/for.t
+t/fornot.t
+t/heads.t
+t/items.t
+t/linkclass.t
+t/puller.t
+t/render.t
+t/sanity_tfh.t
+t/stree.t
+t/test_junk1.pod
+t/test_junk1_out.txt
+t/test_junk2.pod
+t/test_junk2_out.txt
+t/test_old_perlcygwin.pod
+t/test_old_perlcygwin_out.txt
+t/test_old_perlfaq3.pod
+t/test_old_perlfaq3_out.txt
+t/test_old_perlvar.pod
+t/test_old_perlvar_out.txt
+t/verbatims.t
+t/x_nixer.t
6 MANIFEST.SKIP
@@ -0,0 +1,6 @@
+^MANIFEST\.bak$
+Makefile(\.old)?$
+\.rej$
+CVS
+blib
+~
34 Makefile.PL
@@ -0,0 +1,34 @@
+# This -*- perl -*- script writes the Makefile for Pod::Simple
+#
+# Time-stamp: "2002-09-11 01:22:50 MDT"
+#
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+#
+require 5.004;
+use strict;
+use ExtUtils::MakeMaker;
+
+WriteMakefile
+ (
+ NAME => 'Pod::Simple',
+ VERSION_FROM => './lib/Pod/Simple.pm',
+ ABSTRACT_FROM => './lib/Pod/Simple.pod',
+ PREREQ_PM => {
+ 'Text::Wrap' => '98.112902',
+ 'Pod::Escapes' => '1.03',
+ },
+ # INSTALLDIRS => 'perl',
+ );
+
+
+package MY;
+
+sub libscan
+{ # Determine things that should *not* be installed
+ my($self, $path) = @_;
+ return '' if $path =~ m/~/;
+ $path;
+}
+
+__END__
5 README
@@ -0,0 +1,5 @@
+ Time-stamp: "2002-09-11 01:08:10 MDT"
+
+THIS IS A BETA RELEASE. The modules are wildly underdocumented, and
+the APIs may change.
+
1,334 lib/Pod/Simple.pm
@@ -0,0 +1,1334 @@
+
+require 5;
+package Pod::Simple;
+use strict;
+use Carp ();
+BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
+use integer;
+use Pod::Escapes 1.03 ();
+use Pod::Simple::LinkSection ();
+use Pod::Simple::BlackBox ();
+
+use vars qw(
+ $VERSION @ISA
+ @Known_formatting_codes @Known_directives
+ %Known_formatting_codes %Known_directives
+);
+
+@ISA = ('Pod::Simple::BlackBox');
+$VERSION = '0.90';
+
+@Known_formatting_codes = qw(I B C L E F S X Z);
+%Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
+@Known_directives = qw(head1 head2 head3 head4 item over back);
+%Known_directives = map(($_=>'Plain'), @Known_directives);
+
+#-----------------------------------------------------------------------------
+# Set up some constants:
+
+BEGIN {
+ if(defined &ASCII) { }
+ elsif(chr(65) eq 'A') { *ASCII = sub () {1} }
+ else { *ASCII = sub () {''} }
+
+ unless(defined &MANY_LINES) { *MANY_LINES = sub () {20} }
+ DEBUG > 4 and print "MANY_LINES is ", MANY_LINES(), "\n";
+ unless(MANY_LINES() >= 1) {
+ die "MANY_LINES is too small (", MANY_LINES(), ")!\nAborting";
+ }
+}
+if(DEBUG > 2) {
+ print "# We are ", ASCII ? '' : 'not ', "in ASCII-land\n";
+}
+
+# Design note:
+# This is a parser for Pod. It is not a parser for the set of Pod-like
+# languages which happens to contain Pod -- it is just for Pod, plus possibly
+# some extensions.
+
+# @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
+#@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+__PACKAGE__->_accessorize(
+ 'nbsp_for_S', # Whether to map S<...>'s to \xA0 characters
+ 'source_filename', # Filename of the source, for use in warnings
+ 'source_dead', # Whether to consider this parser's source dead
+
+ 'output_fh', # The filehandle we're writing to, if applicable.
+ # Used only in some derived classes.
+
+ 'hide_line_numbers', # For some dumping subclasses: whether to pointedly
+ # suppress the start_line attribute
+
+ 'line_count', # the current line number
+ 'pod_para_count', # count of pod paragraphs seen so far
+
+ 'no_whining', # whether to suppress whining
+ 'no_errata_section', # whether to suppress the errata section
+ 'complain_stderr', # whether to complain to stderr
+
+ 'doc_has_started', # whether we've fired the open-Document event yet
+
+ 'nix_X_codes', # whether to ignore X<...> codes
+ 'merge_text', # whether to avoid breaking a single piece of
+ # text up into several events
+
+ 'code_handler', # coderef to call when a code (non-pod) line is seen
+ 'cut_handler', # coderef to call when a =cut line is seen
+ 'content_seen', # whether we've seen any real Pod content
+ #Called like:
+ # $code_handler->($line, $self->{'line_count'}, $self) if $code_handler;
+ # $cut_handler->($line, $self->{'line_count'}, $self) if $cut_handler;
+
+);
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+# Pull in some functions that, for some reason, I expect to see here too:
+BEGIN {
+ *pretty = \&Pod::Simple::BlackBox::pretty;
+ *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub version_report {
+ my $class = ref($_[0]) || $_[0];
+ if($class eq __PACKAGE__) {
+ return "$class $VERSION";
+ } else {
+ my $v = $class->VERSION;
+ return "$class $v (" . __PACKAGE__ . " $VERSION)";
+ }
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+#sub curr_open { # read-only list accessor
+# return @{ $_[0]{'curr_open'} || return() };
+#}
+#sub _curr_open_listref { $_[0]{'curr_open'} ||= [] }
+
+
+sub output_string {
+ # Works by faking out output_fh. Simplifies our code.
+ #
+ my $this = shift;
+ return $this->{'output_string'} unless @_; # GET. Yes, the FH.
+ require Pod::Simple::TiedOutFH;
+ my $x = (defined($_[0]) and ref($_[0])) ? $_[0] : \( $_[0] );
+ $$x = '' unless defined $$x;
+ DEBUG > 4 and print "# Output string set to $x ($$x)\n";
+ $this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]);
+ return $this->{'output_string'} = ${ $this->{'output_fh'} };
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub new {
+ # takes no parameters
+ my $class = ref($_[0]) || $_[0];
+ #Carp::croak(__PACKAGE__ . " is a virtual base class -- see perldoc "
+ # . __PACKAGE__ );
+ return bless {
+ 'accept_codes' => { map( ($_=>$_), @Known_formatting_codes ) },
+ 'accept_directives' => { %Known_directives },
+ 'accept_targets' => {},
+ }, $class;
+}
+
+
+
+# TODO: an option for whether to interpolate E<...>'s, or just resolve to codes.
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _handle_element_start { # OVERRIDE IN DERIVED CLASS
+ my($self, $element_name, $attr_hash_r) = @_;
+ return;
+}
+
+sub _handle_element_end { # OVERRIDE IN DERIVED CLASS
+ my($self, $element_name) = @_;
+ return;
+}
+
+sub _handle_text { # OVERRIDE IN DERIVED CLASS
+ my($self, $text) = @_;
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now directives (not targets)
+
+sub accept_directive_as_verbatim { shift->_accept_directives('Verbatim', @_) }
+sub accept_directive_as_data { shift->_accept_directives('Data', @_) }
+sub accept_directive_as_processed { shift->_accept_directives('Plain', @_) }
+
+sub _accept_directives {
+ my($this, $type) = splice @_,0,2;
+ foreach my $d (@_) {
+ next unless defined $d and length $d;
+ Carp::croak "\"$d\" isn't a valid directive name"
+ unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s;
+ Carp::croak "\"$d\" is already a reserved Pod directive name"
+ if exists $Known_directives{$d};
+ $this->{'accept_directives'}{$d} = $type;
+ DEBUG > 2 and print "Learning to accept \"=$d\" as directive of type $type\n";
+ }
+ DEBUG > 6 and print "$this\'s accept_directives : ",
+ pretty($this->{'accept_directives'}), "\n";
+
+ return sort keys %{ $this->{'accept_directives'} } if wantarray;
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now targets (not directives)
+
+sub accept_target { shift->accept_targets(@_) } # alias
+sub accept_target_as_text { shift->accept_targets_as_text(@_) } # alias
+
+
+sub accept_targets { shift->_accept_targets('1', @_) }
+
+sub accept_targets_as_text { shift->_accept_targets('force_resolve', @_) }
+ # forces them to be processed, even when there's no ":".
+
+sub _accept_targets {
+ my($this, $type) = splice @_,0,2;
+ foreach my $t (@_) {
+ next unless defined $t and length $t;
+ # TODO: enforce some limitations on what a target name can be?
+ $this->{'accept_targets'}{$t} = $type;
+ DEBUG > 2 and print "Learning to accept \"$t\" as target of type $type\n";
+ }
+ return sort keys %{ $this->{'accept_targets'} } if wantarray;
+ return;
+}
+
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now codes (not targets or directives)
+
+sub accept_code { shift->accept_codes(@_) } # alias
+
+sub accept_codes { # Add some codes
+ my $this = shift;
+
+ foreach my $new_code (@_) {
+ next unless defined $new_code and length $new_code;
+ if(ASCII) {
+ # A good-enough check that it's good as an XML Name symbol:
+ Carp::croak "\"$new_code\" isn't a valid element name"
+ if $new_code =~
+ m/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/
+ # Characters under 0x80 that aren't legal in an XML Name.
+ or $new_code =~ m/^[-\.0-9]/s
+ or $new_code =~ m/:[-\.0-9]/s;
+ # The legal under-0x80 Name characters that
+ # an XML Name still can't start with.
+ }
+
+ $this->{'accept_codes'}{$new_code} = $new_code;
+
+ # Yes, map to itself -- just so that when we
+ # see "=extend W [whatever] thatelementname", we say that W maps
+ # to whatever $this->{accept_codes}{thatelementname} is,
+ # i.e., "thatelementname". Then when we go re-mapping,
+ # a "W" in the treelet turns into "thatelementname". We only
+ # remap once.
+ # If we say we accept "W", then a "W" in the treelet simply turns
+ # into "W".
+ }
+
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub parse_string_document {
+ my $self = shift;
+ my @lines;
+ foreach my $line_group (@_) {
+ next unless defined $line_group and length $line_group;
+ pos($line_group) = 0;
+ while($line_group =~
+ m/([^\n\r]*)((?:\r?\n)?)/g
+ ) {
+ #print(">> $1\n"),
+ $self->parse_lines($1)
+ if length($1) or length($2)
+ or pos($line_group) != length($line_group);
+ # I.e., unless it's a zero-length "empty line" at the very
+ # end of "foo\nbar\n" (i.e., between the \n and the EOS).
+ }
+ }
+ $self->parse_lines(undef); # to signal EOF
+ return $self;
+}
+
+
+sub parse_file {
+ my($self, $source) = (@_);
+
+ if(!defined $source) {
+ Carp::croak("Can't use empty-string as a source for parse_file");
+ } elsif(ref(\$source) eq 'GLOB') {
+ $self->{'source_filename'} = '' . ($source);
+ } elsif(ref $source) {
+ $self->{'source_filename'} = '' . ($source);
+ } elsif(!length $source) {
+ Carp::croak("Can't use empty-string as a source for parse_file");
+ } else {
+ {
+ local *PODSOURCE;
+ open(PODSOURCE, "<$source") || Carp::croak("Can't open $source: $!");
+ $self->{'source_filename'} = $source;
+ $source = *PODSOURCE{IO};
+ }
+ # TODO: file-discipline things here!
+ }
+ # By here, $source is a FH.
+
+ $self->{'source_fh'} = $source;
+
+ my($i, @lines);
+ until( $self->{'source_dead'} ) {
+ splice @lines;
+ for($i = MANY_LINES; $i--;) { # read those many lines at a time
+ push @lines, scalar(<$source>);
+ last unless defined $lines[-1];
+ # but pass thru the undef, which will set source_dead to true
+ }
+ $self->parse_lines(@lines);
+ }
+ delete($self->{'source_fh'}); # so it can be GC'd
+ return $self;
+}
+
+
+#-----------------------------------------------------------------------------
+
+sub whine {
+ #my($self,$line,$complaint) = @_;
+ my $self = shift(@_);
+ if($self->{'no_whining'}) {
+ DEBUG > 9 and print "Discarding complaint (at line $_[0]) $_[1]\n because no_whining is on.\n";
+ return;
+ }
+ return $self->_complain_warn(@_) if $self->{'complain_stderr'};
+ return $self->_complain_errata(@_);
+}
+
+sub scream { # like whine, but not suppressable
+ #my($self,$line,$complaint) = @_;
+ my $self = shift(@_);
+ return $self->_complain_warn(@_) if $self->{'complain_stderr'};
+ return $self->_complain_errata(@_);
+}
+
+sub _complain_warn {
+ my($self,$line,$complaint) = @_;
+ return printf STDERR "%s around line %s: %s\n",
+ $self->{'source_filename'} || 'Pod input', $line, $complaint;
+}
+
+sub _complain_errata {
+ my($self,$line,$complaint) = @_;
+ if( $self->{'no_errata_section'} ) {
+ DEBUG > 9 and print "Discarding erratum (at line $line) $complaint\n because no_errata_section is on.\n";
+ } else {
+ DEBUG > 9 and print "Queuing erratum (at line $line) $complaint\n";
+ push @{$self->{'errata'}{$line}}, $complaint
+ # for a report to be generated later!
+ }
+ return 1;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+
+sub _get_item_type { # mutates the item!!
+ my($self, $para) = @_;
+ return $para->[1]{'~type'} if $para->[1]{'~type'};
+ my $content = join "\n", @{$para}[2 .. $#$para];
+
+ if($content =~ m/^\s*\*\s*$/s or $content =~ m/^\s*$/s) {
+ # Like: "=item *", "=item * ", "=item"
+
+ splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
+ $para->[1]{'~orig_content'} = $content;
+ return $para->[1]{'~type'} = 'bullet';
+
+ } elsif($content =~ m/^\s*(\d+)\.?\s*$/s) {
+ # Like: "=item 1.", "=item 123412"
+
+ $para->[1]{'~orig_content'} = $content;
+ $para->[1]{'number'} = $1; # Yes, stores the number there!
+
+ splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
+ return $para->[1]{'~type'} = 'number';
+
+ } else {
+ # It's anything else.
+ return $para->[1]{'~type'} = 'text';
+
+ }
+}
+
+#-----------------------------------------------------------------------------
+
+sub _make_treelet {
+ my $self = shift; # and ($para, $start_line)
+ my $treelet;
+ if(!@_) {
+ return [''];
+ } if(ref $_[0] and ref $_[0][0] and $_[0][0][0] eq '~Top') {
+ # Hack so we can pass in fake-o pre-cooked paragraphs:
+ # just have the first line be a reference to a ['~Top', {}, ...]
+ # We use this feechure in gen_errata and stuff.
+
+ DEBUG and print "Applying precooked treelet hack to $_[0][0]\n";
+ $treelet = $_[0][0];
+ splice @$treelet, 0, 2; # lop the top off
+ return $treelet;
+ } else {
+ $treelet = $self->_treelet_from_formatting_codes(@_);
+ }
+
+ if( $self->_remap_sequences($treelet) ) {
+ $self->_treat_Zs($treelet); # Might as well nix these first
+ $self->_treat_Ls($treelet); # L has to precede E and S
+ $self->_treat_Es($treelet);
+ $self->_treat_Ss($treelet); # S has to come after E
+
+ $self->_wrap_up($treelet); # Nix X's and merge texties
+
+ } else {
+ DEBUG and print "Formatless treelet gets fast-tracked.\n";
+ # Very common case!
+ }
+
+ splice @$treelet, 0, 2; # lop the top off
+
+ return $treelet;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _wrap_up {
+ my($self, @stack) = @_;
+ my $nixx = $self->{'nix_X_codes'};
+ my $merge = $self->{'merge_text' };
+ return unless $nixx or $merge;
+
+ DEBUG > 2 and print "\nStarting _wrap_up traversal.\n",
+ $merge ? (" Merge mode on\n") : (),
+ $nixx ? (" Nix-X mode on\n") : (),
+ ;
+
+
+ my($i, $treelet);
+ while($treelet = shift @stack) {
+ DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n";
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ DEBUG > 3 and print " Considering child at $i ", pretty($treelet->[$i]), "\n";
+ if($nixx and ref $treelet->[$i] and $treelet->[$i][0] eq 'X') {
+ DEBUG > 3 and print " Nixing X node at $i\n";
+ splice(@$treelet, $i, 1); # just nix this node (and its descendants)
+ # no need to back-update the counter just yet
+ redo;
+
+ } elsif($merge and $i != 2 and # non-initial
+ !ref $treelet->[$i] and !ref $treelet->[$i - 1]
+ ) {
+ DEBUG > 3 and print " Merging ", $i-1,
+ ":[$treelet->[$i-1]] and $i\:[$treelet->[$i]]\n";
+ $treelet->[$i-1] .= ( splice(@$treelet, $i, 1) )[0];
+ DEBUG > 4 and print " Now: ", $i-1, ":[$treelet->[$i-1]]\n";
+ --$i;
+ next;
+ # since we just pulled the possibly last node out from under
+ # ourselves, we can't just redo()
+
+ } elsif( ref $treelet->[$i] ) {
+ DEBUG > 4 and print " Enqueuing ", pretty($treelet->[$i]), " for traversal.\n";
+ push @stack, $treelet->[$i];
+
+ if($treelet->[$i][0] eq 'L') {
+ my $thing;
+ foreach my $attrname ('section', 'to') {
+ if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
+ unshift @stack, $thing;
+ DEBUG > 4 and print " +Enqueuing ",
+ pretty( $treelet->[$i][1]{$attrname} ),
+ " as an attribute value to tweak.\n";
+ }
+ }
+ }
+ }
+ }
+ }
+ DEBUG > 2 and print "End of _wrap_up traversal.\n\n";
+
+ return;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _remap_sequences {
+ my($self,@stack) = @_;
+
+ if(@stack == 1 and @{ $stack[0] } == 3 and !ref $stack[0][2]) {
+ # VERY common case: abort it.
+ DEBUG and print "Skipping _remap_sequences: formatless treelet.\n";
+ return 0;
+ }
+
+ my $map = ($self->{'accept_codes'} || die "NO accept_codes in $self?!?");
+
+ my $start_line = $stack[0][1]{'start_line'};
+ DEBUG > 2 and printf
+ "\nAbout to start _remap_sequences on treelet from line %s.\n",
+ $start_line || '[?]'
+ ;
+ DEBUG > 3 and print " Map: ",
+ join('; ', map "$_=" . (
+ ref($map->{$_}) ? join(",", @{$map->{$_}}) : $map->{$_}
+ ),
+ sort keys %$map ),
+ ("B~C~E~F~I~L~S~X~Z" eq join '~', sort keys %$map)
+ ? " (all normal)\n" : "\n"
+ ;
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ my($is, $was, $i, $treelet); # scratch
+ while($treelet = shift @stack) {
+ DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n";
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+
+ DEBUG > 4 and print " Noting child $i : $treelet->[$i][0]<...>\n";
+
+ $is = $treelet->[$i][0] = $map->{ $was = $treelet->[$i][0] };
+ if( DEBUG > 3 ) {
+ if(!defined $is) {
+ print " Code $was<> is UNKNOWN!\n";
+ } elsif($is eq $was) {
+ DEBUG > 4 and print " Code $was<> stays the same.\n";
+ } else {
+ print " Code $was<> maps to ",
+ ref($is)
+ ? ( "tags ", map("$_<", @$is), '...', map('>', @$is), "\n" )
+ : "tag $is<...>.\n";
+ }
+ }
+
+ if(!defined $is) {
+ $self->whine($start_line, "Deleting unknown formatting code $was<>");
+ $is = $treelet->[$i][0] = '1'; # But saving the children!
+ # I could also insert a leading "$was<" and tailing ">" as
+ # children of this node, but something about that seems icky.
+ }
+ if(ref $is) {
+ my @dynasty = @$is;
+ DEBUG > 4 and print " Renaming $was node to $dynasty[-1]\n";
+ $treelet->[$i][0] = pop @dynasty;
+ my $nugget;
+ while(@dynasty) {
+ DEBUG > 4 and printf
+ " Grafting a new %s node between %s and %s\n",
+ $dynasty[-1], $treelet->[0], $treelet->[$i][0],
+ ;
+
+ #$nugget = ;
+ splice @$treelet, $i, 1, [pop(@dynasty), {}, $treelet->[$i]];
+ # relace node with a new parent
+ }
+ } elsif($is eq '0') {
+ splice(@$treelet, $i, 1); # just nix this node (and its descendants)
+ --$i; # back-update the counter
+ } elsif($is eq '1') {
+ splice(@$treelet, $i, 1 # replace this node with its children!
+ => splice @{ $treelet->[$i] },2
+ # (not catching its first two (non-child) items)
+ );
+ --$i; # back up for new stuff
+ } else {
+ # otherwise it's unremarkable
+ unshift @stack, $treelet->[$i]; # just recurse
+ }
+ }
+ }
+
+ DEBUG > 2 and print "End of _remap_sequences traversal.\n\n";
+
+ if(@_ == 2 and @{ $_[1] } == 3 and !ref $_[1][2]) {
+ DEBUG and print "Noting that the treelet is now formatless.\n";
+ return 0;
+ }
+ return 1;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _ponder_extend {
+
+ # "Go to an extreme, move back to a more comfortable place"
+ # -- /Oblique Strategies/, Brian Eno and Peter Schmidt
+
+ my($self, $para) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+
+ DEBUG > 2 and print "Ogling extensor: =extend $content\n";
+
+ if($content =~
+ m/^
+ (\S+) # 1 : new item
+ \s+
+ (\S+) # 2 : fallback(s)
+ (?:\s+(\S+))? # 3 : element name(s)
+ \s*
+ $
+ /xs
+ ) {
+ my $new_letter = $1;
+ my $fallbacks_one = $2;
+ my $elements_one;
+ $elements_one = defined($3) ? $3 : $1;
+
+ DEBUG > 2 and print "Extensor has good syntax.\n";
+
+ unless($new_letter =~ m/^[A-Z]$/s or $new_letter) {
+ DEBUG > 2 and print " $new_letter isn't a valid thing to entend.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You can extend only formatting codes A-Z, not like \"$new_letter\""
+ );
+ return;
+ }
+
+ if(grep $new_letter eq $_, @Known_formatting_codes) {
+ DEBUG > 2 and print " $new_letter isn't a good thing to extend, because known.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You can't extend an established code like \"$new_letter\""
+ );
+
+ #TODO: or allow if last bit is same?
+
+ return;
+ }
+
+ unless($fallbacks_one =~ m/^[A-Z](,[A-Z])*$/s # like "B", "M,I", etc.
+ or $fallbacks_one eq '0' or $fallbacks_one eq '1'
+ ) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Format for second =extend parameter must be like"
+ . " M or 1 or 0 or M,N or M,N,O but like "
+ . $content
+ );
+ return;
+ }
+
+ unless($elements_one =~ m/^[^ ,]+(,[^ ,]+)*$/s) { # like "B", "M,I", etc.
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Format for third =extend parameter: like foo or bar,Baz,qu:ux but not like "
+ . $elements_one
+ );
+ return;
+ }
+
+ my @fallbacks = split ',', $fallbacks_one, -1;
+ my @elements = split ',', $elements_one, -1;
+
+ foreach my $f (@fallbacks) {
+ next if exists $Known_formatting_codes{$f} or $f eq '0' or $f eq '1';
+ DEBUG > 2 and print " Can't fall back on unknown code $f\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Can't use unknown formatting code '$f' as a fallback for '$new_letter'"
+ );
+ return;
+ }
+
+ DEBUG > 3 and printf "Extensor: Fallbacks <%s> Elements <%s>.\n",
+ @fallbacks, @elements;
+
+ my $canonical_form;
+ foreach my $e (@elements) {
+ if(exists $self->{'accept_codes'}{$e}) {
+ DEBUG > 1 and print " Mapping '$new_letter' to known extension '$e'\n";
+ $canonical_form = $e;
+ last; # first acceptable elementname wins!
+ } else {
+ DEBUG > 1 and print " Can't map '$new_letter' to unknown extension '$e'\n";
+ }
+ }
+
+
+ if( defined $canonical_form ) {
+ # We found a good N => elementname mapping
+ $self->{'accept_codes'}{$new_letter} = $canonical_form;
+ DEBUG > 2 and print
+ "Extensor maps $new_letter => known element $canonical_form.\n";
+ } else {
+ # We have to use the fallback(s), which might be '0', or '1'.
+ $self->{'accept_codes'}{$new_letter}
+ = (@fallbacks == 1) ? $fallbacks[0] : \@fallbacks;
+ DEBUG > 2 and print
+ "Extensor maps $new_letter => fallbacks @fallbacks.\n";
+ }
+
+ } else {
+ DEBUG > 2 and print "Extensor has bad syntax.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Unknown =extend syntax: $content"
+ )
+ }
+ return;
+}
+
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _treat_Zs { # Nix Z<...>'s
+ my($self,@stack) = @_;
+
+ my($i, $treelet);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ while($treelet = shift @stack) {
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ unless($treelet->[$i][0] eq 'Z') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+ DEBUG > 1 and print "Nixing Z node @{$treelet->[$i]}\n";
+
+ # bitch UNLESS it's empty
+ unless( @{$treelet->[$i]} == 2
+ or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ ) {
+ $self->whine( $start_line, "A non-empty Z<>" );
+ } # but kill it anyway
+
+ splice(@$treelet, $i, 1); # thereby just nix this node.
+ --$i;
+
+ }
+ }
+
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+# Quoting perlpodspec:
+
+# In parsing an L<...> code, Pod parsers must distinguish at least four
+# attributes:
+
+############# Not used. Expressed via the element children plus
+############# the value of the "content-implicit" flag.
+# First:
+# The link-text. If there is none, this must be undef. (E.g., in "L<Perl
+# Functions|perlfunc>", the link-text is "Perl Functions". In
+# "L<Time::HiRes>" and even "L<|Time::HiRes>", there is no link text. Note
+# that link text may contain formatting.)
+#
+
+############# The element children
+# Second:
+# The possibly inferred link-text -- i.e., if there was no real link text,
+# then this is the text that we'll infer in its place. (E.g., for
+# "L<Getopt::Std>", the inferred link text is "Getopt::Std".)
+#
+
+############# The "to" attribute (which might be text, or a treelet)
+# Third:
+# The name or URL, or undef if none. (E.g., in "L<Perl
+# Functions|perlfunc>", the name -- also sometimes called the page -- is
+# "perlfunc". In "L</CAVEATS>", the name is undef.)
+#
+
+############# The "section" attribute (which might be next, or a treelet)
+# Fourth:
+# The section (AKA "item" in older perlpods), or undef if none. E.g., in
+# Getopt::Std/DESCRIPTION, "DESCRIPTION" is the section. (Note that this
+# is not the same as a manpage section like the "5" in "man 5 crontab".
+# "Section Foo" in the Pod sense means the part of the text that's
+# introduced by the heading or item whose text is "Foo".)
+#
+# Pod parsers may also note additional attributes including:
+#
+
+############# The "type" attribute.
+# Fifth:
+# A flag for whether item 3 (if present) is a URL (like
+# "http://lists.perl.org" is), in which case there should be no section
+# attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or
+# possibly a man page name (like "crontab(5)" is).
+#
+
+############# Not implemented, I guess.
+# Sixth:
+# The raw original L<...> content, before text is split on "|", "/", etc,
+# and before E<...> codes are expanded.
+
+
+# For L<...> codes without a "name|" part, only E<...> and Z<> codes may
+# occur -- no other formatting codes. That is, authors should not use
+# "L<B<Foo::Bar>>".
+#
+# Note, however, that formatting codes and Z<>'s can occur in any and all
+# parts of an L<...> (i.e., in name, section, text, and url).
+
+sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
+
+ # L<name>
+ # L<name/"sec"> or L<name/sec>
+ # L</"sec"> or L</sec> or L<"sec">
+ # L<text|name>
+ # L<text|name/"sec"> or L<text|name/sec>
+ # L<text|/"sec"> or L<text|/sec> or L<text|"sec">
+ # L<scheme:...>
+
+ my($self,@stack) = @_;
+
+ my($i, $treelet);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ while($treelet = shift @stack) {
+ for(my $i = 2; $i < @$treelet; ++$i) {
+ # iterate over children of current tree node
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ unless($treelet->[$i][0] eq 'L') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+
+ # By here, $treelet->[$i] is definitely an L node
+ DEBUG > 1 and print "Ogling L node $treelet->[$i]\n";
+
+ # bitch if it's empty
+ if( @{$treelet->[$i]} == 2
+ or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ ) {
+ $self->whine( $start_line, "An empty L<>" );
+ $treelet->[$i] = 'L<>'; # just make it a text node
+ next; # and move on
+ }
+
+ # Catch URLs:
+ # URLs can, alas, contain E<...> sequences, so we can't /assume/
+ # that this is one text node. But it has to START with one text
+ # node...
+ if(! ref $treelet->[$i][2] and
+ $treelet->[$i][2] =~ m/^\w+:[^:\s]\S*$/s
+ ) {
+ $treelet->[$i][1]{'type'} = 'url';
+ $treelet->[$i][1]{'content-implicit'} = 'yes';
+
+ if( 3 == @{ $treelet->[$i] } ) {
+ # But if it IS just one text node (most common case)
+ DEBUG > 1 and printf qq{Catching "%s as " as ho-hum L<URL> link.\n},
+ $treelet->[$i][2]
+ ;
+ $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new(
+ $treelet->[$i][2]
+ ); # its own treelet
+ } else {
+ # It's a URL but complex (like "L<foo:bazE<123>bar>"). Feh.
+ #$treelet->[$i][1]{'to'} = [ @{$treelet->[$i]} ];
+ #splice @{ $treelet->[$i][1]{'to'} }, 0,2;
+ #DEBUG > 1 and printf qq{Catching "%s as " as complex L<URL> link.\n},
+ # join '~', @{$treelet->[$i][1]{'to' }};
+
+ $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new(
+ $treelet->[$i] # yes, clone the whole content as a treelet
+ );
+ $treelet->[$i][1]{'to'}[0] = ''; # set the copy's tagname to nil
+ die "SANITY FAILURE" if $treelet->[0] eq ''; # should never happen!
+ DEBUG > 1 and print
+ qq{Catching "$treelet->[$i][1]{'to'}" as a complex L<URL> link.\n};
+ }
+
+ next; # and move on
+ }
+
+
+ # Catch some very simple and/or common cases
+ if(@{$treelet->[$i]} == 3 and ! ref $treelet->[$i][2]) {
+ my $it = $treelet->[$i][2];
+ if($it =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s) { # man sections
+ # Hopefully neither too broad nor too restrictive a RE
+ DEBUG > 1 and print "Catching \"$it\" as manpage link.\n";
+ $treelet->[$i][1]{'type'} = 'man';
+ # This's the only place where man links can get made.
+ $treelet->[$i][1]{'content-implicit'} = 'yes';
+ $treelet->[$i][1]{'to' } =
+ Pod::Simple::LinkSection->new( $it ); # treelet!
+
+ next;
+ }
+ if($it =~ m/^[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+(\:\:[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+)*$/s) {
+ # Extremely forgiving idea of what constitutes a bare
+ # modulename link like L<Foo::Bar> or even L<Thing::1.0::Docs::Tralala>
+ DEBUG > 1 and print "Catching \"$it\" as ho-hum L<Modulename> link.\n";
+ $treelet->[$i][1]{'type'} = 'pod';
+ $treelet->[$i][1]{'content-implicit'} = 'yes';
+ $treelet->[$i][1]{'to' } =
+ Pod::Simple::LinkSection->new( $it ); # treelet!
+ next;
+ }
+ # else fall thru...
+ }
+
+
+
+ # ...Uhoh, here's the real L<...> parsing stuff...
+ # "With the ill behavior, with the ill behavior, with the ill behavior..."
+
+ DEBUG > 1 and print "Running a real parse on this non-trivial L\n";
+
+
+ my $link_text; # set to an arrayref if found
+ my $ell = $treelet->[$i];
+ my @ell_content = @$ell;
+ splice @ell_content,0,2; # Knock off the 'L' and {} bits
+
+ DEBUG > 3 and print " Ell content to start: ",
+ pretty(@ell_content), "\n";
+
+
+ # Look for the "|" -- only in CHILDREN (not all underlings!)
+ # Like L<I like the strictness|strict>
+ DEBUG > 3 and
+ print " Peering at L content for a '|' ...\n";
+ for(my $j = 0; $j < @ell_content; ++$j) {
+ next if ref $ell_content[$j];
+ DEBUG > 3 and
+ print " Peering at L-content text bit \"$ell_content[$j]\" for a '|'.\n";
+
+ if($ell_content[$j] =~ m/^([^\|]*)\|(.*)$/s) {
+ my @link_text = ($1); # might be 0-length
+ $ell_content[$j] = $2; # might be 0-length
+
+ DEBUG > 3 and
+ print " FOUND a '|' in it. Splitting into [$1] + [$2]\n";
+
+ unshift @link_text, splice @ell_content, 0, $j;
+ # leaving only things at J and after
+ @ell_content = grep ref($_)||length($_), @ell_content ;
+ $link_text = [grep ref($_)||length($_), @link_text ];
+ DEBUG > 3 and printf
+ " So link text is %s\n and remaining ell content is %s\n",
+ pretty($link_text), pretty(@ell_content);
+ last;
+ }
+ }
+
+
+ # Now look for the "/" -- only in CHILDREN (not all underlings!)
+ # And afterward, anything left in @ell_content will be the raw name
+ # Like L<Foo::Bar/Object Methods>
+ my $section_name; # set to arrayref if found
+ DEBUG > 3 and print " Peering at L-content for a '/' ...\n";
+ for(my $j = 0; $j < @ell_content; ++$j) {
+ next if ref $ell_content[$j];
+ DEBUG > 3 and
+ print " Peering at L-content text bit \"$ell_content[$j]\" for a '/'.\n";
+
+ if($ell_content[$j] =~ m/^([^\/]*)\/(.*)$/s) {
+ my @section_name = ($2); # might be 0-length
+ $ell_content[$j] = $1; # might be 0-length
+
+ DEBUG > 3 and
+ print " FOUND a '/' in it.",
+ " Splitting to page [...$1] + section [$2...]\n";
+
+ push @section_name, splice @ell_content, 1+$j;
+ # leaving only things before and including J
+
+ @ell_content = grep ref($_)||length($_), @ell_content ;
+ @section_name = grep ref($_)||length($_), @section_name ;
+
+ # Turn L<.../"foo"> into L<.../foo>
+ if(@section_name
+ and !ref($section_name[0]) and !ref($section_name[-1])
+ and $section_name[ 0] =~ m/^\"/s
+ and $section_name[-1] =~ m/\"$/s
+ and !( # catch weird degenerate case of L<"> !
+ @section_name == 1 and $section_name[0] eq '"'
+ )
+ ) {
+ $section_name[ 0] =~ s/^\"//s;
+ $section_name[-1] =~ s/\"$//s;
+ DEBUG > 3 and
+ print " Quotes removed: ", pretty(@section_name), "\n";
+ } else {
+ DEBUG > 3 and
+ print " No need to remove quotes in ", pretty(@section_name), "\n";
+ }
+
+ $section_name = \@section_name;
+ last;
+ }
+ }
+
+ # Turn L<"Foo Bar"> into L</Foo Bar>
+ if(!$section_name and @ell_content
+ and !ref($ell_content[0]) and !ref($ell_content[-1])
+ and $ell_content[ 0] =~ m/^\"/s
+ and $ell_content[-1] =~ m/\"$/s
+ and !( # catch weird degenerate case of L<"> !
+ @ell_content == 1 and $ell_content[0] eq '"'
+ )
+ ) {
+ $section_name = [splice @ell_content];
+ $section_name->[ 0] =~ s/^\"//s;
+ $section_name->[-1] =~ s/\"$//s;
+ }
+
+ # Turn L<Foo Bar> into L</Foo Bar>.
+ if(!$section_name and !$link_text and @ell_content
+ and grep !ref($_) && m/ /s, @ell_content
+ ) {
+ $section_name = [splice @ell_content];
+ # That's support for the now-deprecated syntax.
+ # (Maybe generate a warning eventually?)
+ # Note that it deliberately won't work on L<...|Foo Bar>
+ }
+
+
+ # Now make up the link_text
+ # L<Foo> -> L<Foo|Foo>
+ # L</Bar> -> L<"Bar"|Bar>
+ # L<Foo/Bar> -> L<"Bar" in Foo/Foo>
+ unless($link_text) {
+ $ell->[1]{'content-implicit'} = 'yes';
+ $link_text = [];
+ push @$link_text, '"', @$section_name, '"' if $section_name;
+
+ if(@ell_content) {
+ $link_text->[-1] .= ' in ' if $section_name;
+ push @$link_text, @ell_content;
+ }
+ }
+
+
+ # And the E resolver will have to deal with all our treeletty things:
+
+ if(@ell_content == 1 and !ref($ell_content[0])
+ and $ell_content[0] =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s
+ ) {
+ $ell->[1]{'type'} = 'man';
+ DEBUG > 3 and print "Considering this ($ell_content[0]) a man link.\n";
+ } else {
+ $ell->[1]{'type'} = 'pod';
+ DEBUG > 3 and print "Considering this a pod link (not man or url).\n";
+ }
+
+ if( defined $section_name ) {
+ $ell->[1]{'section'} = Pod::Simple::LinkSection->new(
+ ['', {}, @$section_name]
+ );
+ DEBUG > 3 and print "L-section content: ", pretty($ell->[1]{'section'}), "\n";
+ }
+
+ if( @ell_content ) {
+ $ell->[1]{'to'} = Pod::Simple::LinkSection->new(
+ ['', {}, @ell_content]
+ );
+ DEBUG > 3 and print "L-to content: ", pretty($ell->[1]{'to'}), "\n";
+ }
+
+ # And update children to be the link-text:
+ @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : '');
+
+ DEBUG > 2 and print "End of L-parsing for this node $treelet->[$i]\n";
+
+ unshift @stack, $treelet->[$i]; # might as well recurse
+ }
+ }
+
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _treat_Es {
+ my($self,@stack) = @_;
+
+ my($i, $treelet, $content, $replacer);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+
+ # Has frightening side effects on L nodes' attributes.
+
+ #my @ells_to_tweak;
+
+ while($treelet = shift @stack) {
+ for(my $i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ if($treelet->[$i][0] eq 'L') {
+ # SPECIAL STUFF for semi-processed L<>'s
+
+ my $thing;
+ foreach my $attrname ('section', 'to') {
+ if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
+ unshift @stack, $thing;
+ DEBUG > 2 and print " Enqueuing ",
+ pretty( $treelet->[$i][1]{$attrname} ),
+ " as an attribute value to tweak.\n";
+ }
+ }
+
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ } elsif($treelet->[$i][0] ne 'E') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+ DEBUG > 1 and print "Ogling E node ", pretty($treelet->[$i]), "\n";
+
+ # bitch if it's empty
+ if( @{$treelet->[$i]} == 2
+ or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ ) {
+ $self->whine( $start_line, "An empty E<>" );
+ $treelet->[$i] = 'E<>'; # splice in a literal
+ next;
+ }
+
+ # bitch if content is weird
+ unless(@{$treelet->[$i]} == 3 and !ref($content = $treelet->[$i][2])) {
+ $self->whine( $start_line, "An E<...> surrounding strange content" );
+ $replacer = $treelet->[$i]; # scratch
+ splice(@$treelet, $i, 1, # fake out a literal
+ 'E<',
+ splice(@$replacer,2), # promote its content
+ '>'
+ );
+ # Don't need to do --$i, as the 'E<' we just added isn't interesting.
+ next;
+ }
+
+ DEBUG > 1 and print "Ogling E<$content>\n";
+
+ if(defined( $replacer = Pod::Escapes::e2char($content) )) {
+ DEBUG > 1 and print "Replacing E<$content> with $replacer\n";
+ } else {
+ DEBUG > 1 and print "I don't know how to deal with E<$content>\n";
+ $self->whine( $start_line, "Unknown E content in E<$content>" );
+ $replacer = "E<$content>"; # better than nothing
+ }
+
+ splice(@$treelet, $i, 1, $replacer); # no need to back up $i, tho
+ }
+ }
+
+ return;
+}
+
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _treat_Ss {
+ my($self,$treelet) = @_;
+
+ _change_S_to_nbsp($treelet,0) if $self->{'nbsp_for_S'};
+
+ # TODO: or a change_nbsp_to_S
+ # Normalizing nbsp's to S is harder: for each text node, make S content
+ # out of anything matching m/([^ \xA0]*(?:\xA0+[^ \xA0]*)+)/
+
+
+ return;
+}
+
+
+sub _change_S_to_nbsp { # a recursive function
+ # Sanely assumes that the top node in the excursion won't be an S node.
+ my($treelet, $in_s) = @_;
+
+ my $is_s = ('S' eq $treelet->[0]);
+ $in_s ||= $is_s; # So in_s is on either by this being an S element,
+ # or by an ancestor being an S element.
+
+ for(my $i = 2; $i < @$treelet; ++$i) {
+ if(ref $treelet->[$i]) {
+ if( _change_S_to_nbsp( $treelet->[$i], $in_s ) ) {
+ my $to_pull_up = $treelet->[$i];
+ splice @$to_pull_up,0,2; # ...leaving just its content
+ splice @$treelet, $i, 1, @$to_pull_up; # Pull up content
+ $i += @$to_pull_up - 1; # Make $i skip the pulled-up stuff
+ }
+ } else {
+ $treelet->[$i] =~ tr/ /\xA0/ if ASCII and $in_s;
+ # (If not in ASCIIland, we can't assume that \xA0 == nbsp.)
+
+ # Note that if you apply nbsp_for_S to text, and so turn
+ # "foo S<bar baz> quux" into "foo bar&#160;faz quux", you
+ # end up with something that fails to say "and don't hyphenate
+ # any part of 'bar baz'". However, hyphenation is such a vexing
+ # problem anyway, that most Pod renderers just don't render it
+ # at all. But if you do want to implement hyphenation, I guess
+ # that you'd better have nbsp_for_S off.
+ }
+ }
+
+ return $is_s;
+}
+
+#-----------------------------------------------------------------------------
+
+sub _accessorize { # A simple-minded method-maker
+ no strict 'refs';
+ foreach my $attrname (@_) {
+ next if $attrname =~ m/::/; # a hack
+ *{caller() . '::' . $attrname} = sub {
+ use strict;
+ $Carp::CarpLevel = 1, Carp::croak(
+ "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
+ ) unless (@_ == 1 or @_ == 2) and ref $_[0];
+ (@_ == 1) ? $_[0]->{$attrname}
+ : ($_[0]->{$attrname} = $_[1]);
+ };
+ }
+ # Ya know, they say accessories make the ensemble!
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+#=============================================================================
+
+sub filter {
+ my($class, $source) = @_;
+ my $new = $class->new;
+ $new->output_fh(*STDOUT{IO});
+
+ if(ref($source || '') eq 'SCALAR') {
+ $new->parse_string_document( $$source );
+ } elsif(ref($source)) { # it's a file handle
+ $new->parse_file($source);
+ } else { # it's a filename
+ $new->parse_file($source);
+ }
+
+ return $new;
+}
+
+
+#-----------------------------------------------------------------------------
+
+sub _out {
+ # For use in testing: Class->_out($source)
+ # returns the parse tree of $source
+
+ my $class = shift(@_);
+
+ my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
+
+ DEBUG and print "\n\n", '#' x 76,
+ "\nAbout to parse source: {{\n$_[0]\n}}\n\n";
+
+
+ my $parser = $class->new;
+ $parser->hide_line_numbers(1);
+
+ my $out = '';
+ $parser->output_string( \$out );
+ DEBUG and print " _out to ", \$out, "\n";
+
+ $mutor->($parser) if $mutor;
+
+ $parser->parse_string_document( shift( @_ ) );
+ # use Data::Dumper; print Dumper($parser), "\n";
+ return $out
+}
+
+
+sub _duo {
+ # For use in testing: Class->_duo($source1, $source2)
+ # returns the parse trees of $source1 and $source2.
+ # Good in things like: &ok( Class->duo(... , ...) );
+
+ my $class = shift(@_);
+
+ Carp::croak "But $class->_duo is useful only in list context!"
+ unless wantarray;
+
+ my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
+
+ Carp::croak "But $class->_duo takes two parameters, not: @_"
+ unless @_ == 2;
+
+ my(@out);
+
+ while( @_ ) {
+ my $parser = $class->new;
+
+ push @out, '';
+ $parser->output_string( \( $out[-1] ) );
+
+ DEBUG and print " _duo out to ", $parser->output_string(),
+ " = $parser->{'output_string'}\n";
+
+ $parser->hide_line_numbers(1);
+ $mutor->($parser) if $mutor;
+ $parser->parse_string_document( shift( @_ ) );
+ # use Data::Dumper; print Dumper($parser), "\n";
+ }
+
+ return @out;
+}
+
+
+
+#-----------------------------------------------------------------------------
+1;
+__END__
+
+TODO:
+A start_formatting_code and end_formatting_code methods, which in the
+base class call start_L, end_L, start_C, end_C, etc., if they are
+defined.
+
+have the POD FORMATTING ERRORS section note the localtime, and the
+version of Pod::Simple.
+
+option to delete all E<shy>s?
+option to scream if under-0x20 literals are found in the input, or
+under-E<32> E codes are found in the tree. And ditto \x7f-\x9f
+
+Option to turn highbit characters into their compromised form? (applies
+to E parsing too)
+
+TODO: BOM/encoding things.
+
+TODO: ascii-compat things in the XML classes?
+
216 lib/Pod/Simple.pod
@@ -0,0 +1,216 @@
+
+=head1 NAME
+
+Pod::Simple - framework for parsing Pod
+
+=head1 SYNOPSIS
+
+ TODO
+
+=head1 DESCRIPTION
+
+Pod::Simple is a Perl library for parsing text in the Pod ("plain old
+documentation") markup language that is typically used for writing
+documentation for Perl and for Perl modules. The Pod format is explained
+in the L<perlpod|perlpod> man page; the most common formatter is called
+"perldoc".
+
+Pod formatters can use Pod::Simple to parse Pod documents into produce
+renderings of them in plain ASCII, in HTML, or in any number of other
+formats. Typically, such formatters will be subclasses of Pod::Simple,
+and so they will inherit its methods, like C<parse_file>.
+
+If you're reading this document just because you have a Pod-processing
+subclass that you want to use, this document (plus the documentation for
+the subclass) is probably all you'll need to read.
+
+If you're reading this document because you want to write a formatter
+subclass, continue reading this document, and then read
+L<Pod::Simple::Subclassing>, and then possibly even read L<perlpodspec>
+(some of which is for parser-writers, but much of which is notes to
+formatter-writers).
+
+
+=head1 MAIN METHODS
+
+
+
+=over
+
+=item C<< $parser = I<SomeClass>->new(); >>
+
+This returns a new parser object, where I<C<SomeClass>> is a subclass
+of Pod::Simple.
+
+=item C<< $parser->output_fh( *OUT ); >>
+
+This sets the filehandle that C<$parser>'s output will be written to.
+You can pass C<*STDOUT>, otherwise you should probably do something
+like this:
+
+ my $outfile = "output.txt";
+ open TXTOUT, ">$outfile" or die "Can't write to $outfile: $!";
+ $parser->output_fh(*TXTOUT);
+
+...before you call one of the C<< $parser->parse_I<whatever> >> methods.
+
+=item C<< $parser->output_string( \$somestring ); >>
+
+This sets the string that C<$parser>'s output will be sent to,
+instead of any filehandle.
+
+
+=item C<< $parser->parse_file( I<$some_filename> ); >>
+
+=item C<< $parser->parse_file( *INPUT_FH ); >>
+
+This reads the Pod content of the file (or filehandle) that you specify,
+and processes it with that C<$parser> object, according to however
+C<$parser>'s class works, and according to whatever parser options you
+have set up for this C<$parser> object.
+
+=item C<< $parser->parse_string_document( I<$all_content> ); >>
+
+This works just like C<parse_file> except that it reads the Pod
+content not from a file, but from a string that you have already
+in memory.
+
+=item C<< $parser->parse_lines( I<...@lines...>, undef ); >>
+
+This processes the lines in C<@lines> (where each list item must be a
+defined value, and must contain exactly one line of content -- so no
+items like C<"foo\nbar"> are allowed). The final C<undef> is used to
+indicate the end of document being parsed.
+
+The other C<parser_I<whatever>> methods are meant to be called only once
+per C<$parser> object; but C<parse_lines> can be called as many times per
+C<$parser> object as you want, as long as the last call (and only
+the last call) ends with an C<undef> value.
+
+
+=item C<< $parser->content_seen >>
+
+This returns true only if there has been any real content seen
+for this document.
+
+
+=item C<< I<SomeClass>->filter( I<$filename> ); >>
+
+=item C<< I<SomeClass>->filter( I<*INPUT_FH> ); >>
+
+=item C<< I<SomeClass>->filter( I<\$document_content> ); >>
+
+This is a shortcut method for creating a new parser object, setting the
+output handle to STDOUT, and then processing the specified file (or
+filehandle, or in-memory document). This is handy for one-liners like
+this:
+
+ perl -MPod::Simple::Text -e "Pod::Simple::Text->filter('thingy.pod')"
+
+=back
+
+
+
+=head1 SECONDARY METHODS
+
+Some of these methods might be of interest to general users, as
+well as of interest to formatter-writers.
+
+Note that the general pattern here is that the accessor-methods
+read the attribute's value with C<< $value = $parser->I<attribute> >>
+and set the attribute's value with
+C<< $parser->I<attribute>(I<newvalue>) >>. For each accessor, I typically
+only mention one syntax or another, based on which I think you are actually
+most likely to use.
+
+
+=over
+
+=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, you will suppress the
+parser's complaints about irregularities in the Pod coding. By default,
+this attribute's value is false, meaning that irregularities will
+be reported.
+
+Note that turning this attribute to true won't suppress one or two kinds
+of complaints about rarely occurring unrecoverable errors.
+
+
+=item C<< $parser->no_errata_section( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, you will stop the parser from
+generating a "POD ERRORS" section at the end of the document. By
+default, this attribute's value is false, meaning that an errata section
+will be generated, as necessary.
+
+
+=item C<< $parser->complain_stderr( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, it will send reports of
+parsing errors to STDERR. By default, this attribute's value is false,
+meaning that no output is sent to STDERR.
+
+Note that errors can be noted in an errata section, or sent to STDERR,
+or both, or neither. So don't think that turning on C<complain_stderr>
+will turn off C<no_errata_section> or vice versa -- these are
+independent attributes.
+
+
+=item C<< $parser->source_filename >>
+
+This returns the filename that this parser object was set to read from.
+
+
+=item C<< $parser->doc_has_started >>
+
+This returns true if C<$parser> has read from a source, and has seen
+Pod content in it.
+
+
+=item C<< $parser->source_dead >>
+
+This returns true if C<$parser> has read from a source, and come to the
+end of that source.
+
+=back
+
+
+=head1 CAVEATS
+
+This is just a beta release -- there are a good number of things still
+left to do. Notably, support for EBCDIC platforms is still half-done,
+an untested.
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple::Subclassing>
+
+L<perlpod|perlpod>
+
+L<perlpodspec|perlpodspec>
+
+L<Pod::Escapes|Pod::Escapes>
+
+L<perldoc>
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
+
1,266 lib/Pod/Simple/BlackBox.pm
@@ -0,0 +1,1266 @@
+
+package Pod::Simple::BlackBox;
+#
+# "What's in the box?" "Pain."
+#
+###########################################################################
+#
+# This is where all the scary things happen: parsing lines into
+# paragraphs; and then into directives, verbatims, and then also
+# turning formatting sequences into treelets.
+#
+# Are you really sure you want to read this code?
+#
+#-----------------------------------------------------------------------------
+#
+# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
+# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
+# to call the proper callbacks on the treelets.
+#
+# Every node in a treelet is a ['name', {attrhash}, ...children...]
+
+use integer; # vroom!
+use strict;
+use Carp ();
+BEGIN {
+ require Pod::Simple;
+ *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub parse_line { shift->parse_lines(@_) } # alias
+
+
+sub parse_lines { # Usage: $parser->parse_lines(@lines)
+ # an undef means end-of-stream
+ my $self = shift;
+
+ my $code_handler = $self->{'code_handler'};
+ my $cut_handler = $self->{'cut_handler'};
+ $self->{'line_count'} ||= 0;
+
+ my $scratch;
+
+ DEBUG > 4 and
+ print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
+
+ DEBUG > 5 and
+ print "# About to parse lines: ",
+ join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
+
+ my $paras = ($self->{'paras'} ||= []);
+ # paragraph buffer. Because we need to defer processing of =over
+ # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
+ # to process this.
+
+ $self->{'pod_para_count'} ||= 0;
+
+ my $line;
+ foreach my $source_line (@_) {
+ if( $self->{'source_dead'} ) {
+ DEBUG > 4 and print "# Source is dead.\n";
+ last;
+ }
+
+ unless( defined $source_line ) {
+ DEBUG > 4 and print "# Undef-line seen.\n";
+
+ push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
+ push @$paras, $paras->[-1], $paras->[-1];
+ # So that it definitely fills the buffer.
+ $self->{'source_dead'} = 1;
+ $self->_ponder_paragraph_buffer;
+ next;
+ }
+ ++$self->{'line_count'};
+
+ ($line = $source_line) =~ tr/\n\r//d;
+ # If we don't have two vars, we'll end up with that there
+ # tr/// modding the (potentially read-only) original source line!
+
+ DEBUG > 5 and print "# Parsing line: [$line]\n";
+
+ if(!$self->{'in_pod'}) {
+ if($line =~ m/^=([a-zA-Z]+)/s) {
+ if($1 eq 'cut') {
+ $self->scream(
+ $self->{'line_count'},
+ "=cut found outside a pod block. Aborting processing of this file."
+ );
+ splice @_;
+ push @_, undef;
+ next;
+ } else {
+ $self->{'in_pod'} = $self->{'start_of_pod_block'}
+ = $self->{'last_was_blank'} = 1;
+ # And fall thru to the pod-mode block further down
+ }
+ } else {
+ DEBUG > 5 and print "# It's a code-line.\n";
+ $code_handler->(map $_, $line, $self->{'line_count'}, $self)
+ if $code_handler;
+ # Note: this may cause code to be processed out of order relative
+ # to pods, but in order relative to cuts.
+
+ if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
+ # That RE is from perlsyn, section "Plain Old Comments (Not!)",
+ #$fname = $2 if defined $2;
+ #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
+ DEBUG > 1 and print "# Setting nextline to $1\n";
+ $self->{'line_count'} = $1 - 1;
+ }
+
+ next;
+ }
+ }
+
+ # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ # Else we're in pod mode:
+
+ if($line =~ m/^=cut/s) {
+ # here ends the pod block, and therefore the previous pod para
+ DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
+ $self->{'in_pod'} = 0;
+ # ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
+ if $cut_handler;
+
+ # TODO: add to docs: Note: this may cause cuts to be processed out
+ # of order relative to pods, but in order relative to code.
+
+ } elsif($line =~ m/^\s*$/s) { # it's a blank line
+ if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
+ DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
+ push @{$paras->[-1]}, $line;
+ } # otherwise it's not interesting
+
+ if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
+ DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
+ }
+
+ $self->{'last_was_blank'} = 1;
+
+ } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
+
+ if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
+ # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
+ my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
+ # Note that in "=head1 foo", the WS is lost.
+ # Example: ['=head1', {'start_line' => 123}, ' foo']
+
+ ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+
+ push @$paras, $new; # the new incipient paragraph
+ DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
+
+ } elsif($line =~ m/^\s/s) {
+
+ if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
+ DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
+ push @{$paras->[-1]}, $line;
+ } else {
+ ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
+ push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
+ }
+ } else {
+ ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
+ DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
+ }
+ $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
+
+ } else {
+ # It's a non-blank line /continuing/ the current para
+ if(@$paras) {
+ DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
+ push @{$paras->[-1]}, $line;
+ } else {
+ # Unexpected case!
+ die "Continuing a paragraph but \@\$paras is empty?";
+ }
+ $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
+ }
+
+ } # ends the big while loop
+
+ DEBUG > 1 and print(pretty(@$paras), "\n");
+ return $self;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+{
+my $m = -321; # magic line number
+
+sub _gen_errata {
+ my $self = $_[0];
+ # Return 0 or more fake-o paragraphs explaining the accumulated
+ # errors on this document.
+
+ return() unless $self->{'errata'} and keys %{$self->{'errata'}};
+
+ my @out;
+
+ foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
+ push @out,
+ ['=item', {'start_line' => $m}, "Around line $line:"],
+ map( ['~Para', {'start_line' => $m, '~cooked' => 1},
+ #['~Top', {'start_line' => $m},
+ $_
+ #]
+ ],
+ @{$self->{'errata'}{$line}}
+ )
+ ;
+ }
+
+ # TODO: report of unknown entities? unrenderable characters?
+
+ unshift @out,
+ ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
+ ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
+ "Hey! ",
+ ['B', {},
+ 'The above document had some coding errors, which are explained below:'
+ ]
+ ],
+ ['=over', {'start_line' => $m, 'errata' => 1}, ''],
+ ;
+
+ push @out,
+ ['=back', {'start_line' => $m, 'errata' => 1}, ''],
+ ;
+
+ DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
+
+ return @out;
+}
+
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+##############################################################################
+##
+## stop reading now stop reading now stop reading now stop reading now stop
+##
+## HERE IT BECOMES REALLY SCARY
+##
+## stop reading now stop reading now stop reading now stop reading now stop
+##
+##############################################################################
+
+sub _ponder_paragraph_buffer {
+
+ # Para-token types as found in the buffer.
+ # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
+ # =over, =back, =item
+ # and the null =pod (to be complained about if over one line)
+ #
+ # "~data" paragraphs are something we generate at this level, depending on
+ # a currently open =over region
+
+ # Events fired: Begin and end for:
+ # directivename (like head1 .. head4), item, extend,
+ # for (from =begin...=end, =for),
+ # over-bullet, over-number, over-text, over-block,
+ # item-bullet, item-number, item-text,
+ # Document,
+ # Data, Para, Verbatim
+ # B, C, longdirname (TODO -- wha?), etc. for all directives
+ #
+
+ my $self = $_[0];
+ my $paras;
+ return unless @{$paras = $self->{'paras'}};
+ my $curr_open = ($self->{'curr_open'} ||= []);
+
+ my $scratch;
+
+ DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
+
+ # We have something in our buffer. So apparently the document has started.
+ unless($self->{'doc_has_started'}) {
+ $self->{'doc_has_started'} = 1;
+
+ my $starting_contentless;
+ $starting_contentless =
+ (
+ !@$curr_open
+ and @$paras and ! grep $_->[0] ne '~end', @$paras
+ # i.e., if the paras is all ~ends
+ )
+ ;
+ DEBUG and print "# Starting ",
+ $starting_contentless ? 'contentless' : 'contentful',
+ " document\n"
+ ;
+
+ $self->_handle_element_start(
+ ($scratch = 'Document'),
+ {
+ 'start_line' => $paras->[0][1]{'start_line'},
+ $starting_contentless ? ( 'contentless' => 1 ) : (),
+ },
+ );
+ }
+
+ my($para, $para_type);
+ while(@$paras) {
+ last if @$paras == 1 and
+ ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
+ or $paras->[0][0] eq '=item' )
+ ;
+ # Those're the three kinds of paragraphs that require lookahead.
+ # Actually, an "=item Foo" inside an <over type=text> region
+ # and any =item inside an <over type=block> region (rare)
+ # don't require any lookahead, but all others (bullets
+ # and numbers) do.
+
+# TODO: winge about many kinds of directives in non-resolving =for regions?
+# TODO: many? like what? =head1 etc?
+
+ $para = shift @$paras;
+ $para_type = $para->[0];
+
+ DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
+ $self->_dump_curr_open(), ")\n";
+
+ if($para_type eq '=for') { #//////////////////////////////////////////////
+ # Fake it out as a begin/end
+ my $target;
+
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =for\n";
+ next;
+ }
+
+ for(my $i = 2; $i < @$para; ++$i) {
+ if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
+ $target = $1;
+ last;
+ }
+ }
+ unless(defined $target) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=for without a target?"
+ );
+ next;
+ }
+ DEBUG > 1 and
+ print "Faking out a =for $target as a =begin $target / =end $target\n";
+
+ $para->[0] = 'Data';
+
+ unshift @$paras,
+ ['=begin',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ $para,
+ ['=end',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ ;
+
+ next;
+
+ } elsif($para_type eq '=begin') { #///////////////////////////////////////
+
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=begin without a target?"
+ );
+ DEBUG and print "Ignoring targetless =begin\n";
+ next;
+ }
+
+ unless($content =~ m/^\S+$/s) { # i.e., unless it's one word
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=begin' only takes one parameter, not several as in '=begin $content'"
+ );
+ DEBUG and print "Ignoring unintelligible =begin $content\n";
+ next;
+ }
+
+
+ $para->[1]{'target'} = $content; # without any ':'
+
+ $content =~ s/^:!/!:/s;
+ my $neg; # whether this is a negation-match
+ $neg = 1 if $content =~ s/^!//s;
+ my $to_resolve; # whether to process formatting codes
+ $to_resolve = 1 if $content =~ s/^://s;
+
+ my $dont_ignore; # whether this target matches us
+
+ foreach my $target_name (
+ split(',', $content, -1),
+ $neg ? () : '*'
+ ) {
+ DEBUG > 2 and
+ print " Considering whether =begin $content matches $target_name\n";
+ next unless $self->{'accept_targets'}{$target_name};
+
+ DEBUG > 2 and
+ print " It DOES match the acceptable target $target_name!\n";
+ $to_resolve = 1
+ if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = $target_name;
+ last; # stop looking at other target names
+ }
+
+ if($neg) {
+ if( $dont_ignore ) {
+ $dont_ignore = '';
+ delete $para->[1]{'target_matching'};
+ DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
+ } else {
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = '!';
+ DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
+ }
+ }
+
+ $para->[0] = '=for'; # Just what we happen to call these, internally
+ $para->[1]{'~really'} ||= '=begin';
+ $para->[1]{'~ignore'} = ! $dont_ignore;
+ $para->[1]{'~resolve'} = $to_resolve;
+
+ DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
+ "ignore contents of this region\n";
+ DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
+ ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
+ DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
+
+ push @$curr_open, $para;
+ if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =begin\n";
+ } else {
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_start(($scratch='for'), $para->[1]);
+ }
+
+ next;
+
+ } elsif($para_type eq '=end') { #/////////////////////////////////////////
+
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ DEBUG and print "Ogling '=end $content' directive\n";
+
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end' without a target?" . (
+ ( @$curr_open and $curr_open->[-1][0] eq '=for' )
+ ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
+ : ''
+ )
+ );
+ DEBUG and print "Ignoring targetless =end\n";
+ next;
+ }
+
+ unless($content =~ m/^\S+$/) { # i.e., unless it's one word
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end $content' is invalid. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ next;
+ }
+
+ unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content without matching =begin. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ next;
+ }
+
+ unless($content eq $curr_open->[-1][1]{'target'}) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content doesn't match =begin "
+ . $curr_open->[-1][1]{'target'}
+ . ". (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
+ next;
+ }
+
+ # Else it's okay to close...
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
+ # And that may be because of this to-be-closed =for region, or some
+ # other one, but it doesn't matter.
+ } else {
+ $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
+ # what's that for?
+
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_end( $scratch = 'for' );
+ }
+ DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
+ pop @$curr_open;
+
+ next;
+
+ } elsif($para_type eq '~end') { #/////////////////////////////////////////
+ # The virtual end-document signal
+
+ if(@$curr_open) { # Deal with things left open
+ DEBUG and print "Stack is nonempty at end-document: (",
+ $self->_dump_curr_open(), ")\n";
+
+ DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
+ foreach my $still_open (@$curr_open) {
+ my @copy = @$still_open;
+ $copy[1] = {%{ $copy[1] }};
+ #$copy[1]{'start_line'} = -1;
+ if($copy[0] eq '=for') {
+ $copy[0] = '=end';
+ } elsif($copy[0] eq '=over') {
+ $copy[0] = '=back';
+ } else {
+ die "I don't know how to auto-close an open $copy[0] region";
+ }
+
+ unless( @copy > 2 ) {
+ push @copy, $copy[1]{'target'};
+ $copy[-1] = '' unless defined $copy[-1];
+ # since =over's don't have targets
+ }
+
+ DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
+ unshift @$paras, \@copy;
+ }
+ # Make sure there is exactly one ~end in the parastack, at the end:
+ @$paras = grep $_->[0] ne '~end', @$paras;
+ push @$paras, $para, $para;
+ # We need two -- once for the next cycle where we
+ # generate errata, and then another to be at the end
+ # when that loop back around to process the errata.
+ next;
+
+ } else {
+ DEBUG and print "Okay, stack is empty now.\n";
+ }
+
+ # Try generating errata section, if applicable
+ unless($self->{'~tried_gen_errata'}) {
+ $self->{'~tried_gen_errata'} = 1;
+ my @extras = $self->_gen_errata();
+ if(@extras) {
+ unshift @$paras, @extras;
+ DEBUG and print "Generated errata... relooping...\n";
<