Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

implement 'parse_characters' option

This option allows the user to supply POD source that has already been
decoded to Perl's internal character format
  • Loading branch information...
commit 0425bade8b8c1969ad3d7f57896a57689251858e 1 parent 18c3e79
Grant McLean authored August 11, 2012
2  lib/Pod/Simple.pm
@@ -87,6 +87,8 @@ __PACKAGE__->_accessorize(
87 87
   'preserve_whitespace', # whether to try to keep whitespace as-is
88 88
   'strip_verbatim_indent', # What indent to strip from verbatim
89 89
 
  90
+  'parse_characters',  # Whether parser should expect chars rather than octets
  91
+
90 92
  'content_seen',      # whether we've seen any real Pod content
91 93
  'errors_seen',       # TODO: document.  whether we've seen any errors (fatal or not)
92 94
 
12  lib/Pod/Simple.pod
Source Rendered
@@ -123,6 +123,14 @@ most likely to use.
123 123
 
124 124
 =over
125 125
 
  126
+=item C<< $parser->parse_characters( I<SOMEVALUE> ) >>
  127
+
  128
+The Pod parser normally expects to read octets and to convert those octets
  129
+to characters based on the C<=encoding> declaration in the Pod source.  Set
  130
+this option to a true value to indicate that the Pod source is already a Perl
  131
+character stream.  This tells the parser to ignore any C<=encoding> command
  132
+and to skip all the code paths involving decoding octets.
  133
+
126 134
 =item C<< $parser->no_whining( I<SOMEVALUE> ) >>
127 135
 
128 136
 If you set this attribute to a true value, you will suppress the
@@ -335,6 +343,10 @@ attempt to guess the encoding (selecting one of UTF-8 or Latin-1) by examining
335 343
 the first non-ASCII bytes and applying the heuristic described in
336 344
 L<perlpodspec>.
337 345
 
  346
+If you set the C<parse_characters> option to a true value the parser will
  347
+expect characters rather than octets; will ignore any C<=encoding>; and will
  348
+make no attempt to decode the input.
  349
+
338 350
 =head1 CAVEATS
339 351
 
340 352
 This is just a beta release -- there are a good number of things still
4  lib/Pod/Simple/BlackBox.pm
@@ -123,7 +123,7 @@ sub parse_lines {             # Usage: $parser->parse_lines(@lines)
123 123
       }
124 124
     }
125 125
 
126  
-    if(!$self->{'encoding'}) {
  126
+    if(!$self->parse_characters && !$self->{'encoding'}) {
127 127
       $self->_try_encoding_guess($line)
128 128
     }
129 129
 
@@ -272,6 +272,8 @@ sub parse_lines {             # Usage: $parser->parse_lines(@lines)
272 272
 sub _handle_encoding_line {
273 273
   my($self, $line) = @_;
274 274
   
  275
+  return if $self->parse_characters;
  276
+
275 277
   # The point of this routine is to set $self->{'_transcoder'} as indicated.
276 278
 
277 279
   return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
58  t/enc-chars.t
... ...
@@ -0,0 +1,58 @@
  1
+# tell parser the source POD has already been decoded from bytes to chars
  2
+# =encoding line should be ignored
  3
+# utf8 characters should come through unscathed
  4
+
  5
+BEGIN {
  6
+    if($ENV{PERL_CORE}) {
  7
+        chdir 't';
  8
+        @INC = '../lib';
  9
+    }
  10
+}
  11
+
  12
+use strict;
  13
+use Test;
  14
+BEGIN { plan tests => 3 };
  15
+
  16
+use Pod::Simple::DumpAsXML;
  17
+use Pod::Simple::XMLOutStream;
  18
+
  19
+
  20
+my $parser = Pod::Simple::XMLOutStream->new;
  21
+$parser->parse_characters(1);
  22
+my $output = '';
  23
+$parser->output_string( \$output );
  24
+$parser->parse_string_document(qq{
  25
+
  26
+=encoding bogocode
  27
+
  28
+=head1 DESCRIPTION
  29
+
  30
+Confirm that if we tell the parser to expect character data, it avoids all
  31
+the code paths that might attempt to decode the source from bytes to chars.
  32
+
  33
+The r\x{101}in in \x{15E}pain \x{FB02}oods the plain
  34
+
  35
+});
  36
+
  37
+ok(1); # parsed without exception
  38
+
  39
+if($output =~ /POD ERRORS/) {
  40
+  ok(0);
  41
+}
  42
+else {
  43
+  ok(1); # no errors
  44
+}
  45
+
  46
+$output =~ s{&#(\d+);}{chr($1)}eg;
  47
+
  48
+if($output =~ /The r\x{101}in in \x{15E}pain \x{FB02}oods the plain/) {
  49
+  ok(1); # data was not messed up
  50
+}
  51
+else {
  52
+  ok(0);
  53
+}
  54
+
  55
+
  56
+
  57
+warn $output;
  58
+exit;

0 notes on commit 0425bad

Please sign in to comment.
Something went wrong with that request. Please try again.