Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

implement 'parse_characters' option

This option allows the user to supply POD source that has already been
decoded to Perl's internal character format
  • Loading branch information...
commit 0425bade8b8c1969ad3d7f57896a57689251858e 1 parent 18c3e79
@grantm grantm authored
View
2  lib/Pod/Simple.pm
@@ -87,6 +87,8 @@ __PACKAGE__->_accessorize(
'preserve_whitespace', # whether to try to keep whitespace as-is
'strip_verbatim_indent', # What indent to strip from verbatim
+ 'parse_characters', # Whether parser should expect chars rather than octets
+
'content_seen', # whether we've seen any real Pod content
'errors_seen', # TODO: document. whether we've seen any errors (fatal or not)
View
12 lib/Pod/Simple.pod
@@ -123,6 +123,14 @@ most likely to use.
=over
+=item C<< $parser->parse_characters( I<SOMEVALUE> ) >>
+
+The Pod parser normally expects to read octets and to convert those octets
+to characters based on the C<=encoding> declaration in the Pod source. Set
+this option to a true value to indicate that the Pod source is already a Perl
+character stream. This tells the parser to ignore any C<=encoding> command
+and to skip all the code paths involving decoding octets.
+
=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
If you set this attribute to a true value, you will suppress the
@@ -335,6 +343,10 @@ attempt to guess the encoding (selecting one of UTF-8 or Latin-1) by examining
the first non-ASCII bytes and applying the heuristic described in
L<perlpodspec>.
+If you set the C<parse_characters> option to a true value the parser will
+expect characters rather than octets; will ignore any C<=encoding>; and will
+make no attempt to decode the input.
+
=head1 CAVEATS
This is just a beta release -- there are a good number of things still
View
4 lib/Pod/Simple/BlackBox.pm
@@ -123,7 +123,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
}
}
- if(!$self->{'encoding'}) {
+ if(!$self->parse_characters && !$self->{'encoding'}) {
$self->_try_encoding_guess($line)
}
@@ -272,6 +272,8 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
sub _handle_encoding_line {
my($self, $line) = @_;
+ return if $self->parse_characters;
+
# The point of this routine is to set $self->{'_transcoder'} as indicated.
return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
View
58 t/enc-chars.t
@@ -0,0 +1,58 @@
+# tell parser the source POD has already been decoded from bytes to chars
+# =encoding line should be ignored
+# utf8 characters should come through unscathed
+
+BEGIN {
+ if($ENV{PERL_CORE}) {
+ chdir 't';
+ @INC = '../lib';
+ }
+}
+
+use strict;
+use Test;
+BEGIN { plan tests => 3 };
+
+use Pod::Simple::DumpAsXML;
+use Pod::Simple::XMLOutStream;
+
+
+my $parser = Pod::Simple::XMLOutStream->new;
+$parser->parse_characters(1);
+my $output = '';
+$parser->output_string( \$output );
+$parser->parse_string_document(qq{
+
+=encoding bogocode
+
+=head1 DESCRIPTION
+
+Confirm that if we tell the parser to expect character data, it avoids all
+the code paths that might attempt to decode the source from bytes to chars.
+
+The r\x{101}in in \x{15E}pain \x{FB02}oods the plain
+
+});
+
+ok(1); # parsed without exception
+
+if($output =~ /POD ERRORS/) {
+ ok(0);
+}
+else {
+ ok(1); # no errors
+}
+
+$output =~ s{&#(\d+);}{chr($1)}eg;
+
+if($output =~ /The r\x{101}in in \x{15E}pain \x{FB02}oods the plain/) {
+ ok(1); # data was not messed up
+}
+else {
+ ok(0);
+}
+
+
+
+warn $output;
+exit;
Please sign in to comment.
Something went wrong with that request. Please try again.