Permalink
Browse files

Add `encoding()` and `deteted_encoding()`.

This allows formatters to output in the original encoding, should they wish.
The value of the detected or configured encoding is now also kept in the
output tree. Thanks to Patrice Dumas for the patch!
  • Loading branch information...
1 parent fa41915 commit fc97ecd5bbb7c02a796e6852078292d020c2d0af @theory committed Feb 25, 2013
View
@@ -6,6 +6,11 @@
Fixed another test script to skip all tests when Encode not
installed.
+ Added 'encoding()' and 'detected_encoding()' to return the current
+ encoding and the encoding actually to decode the input file. The
+ value from "=encoding" is also now kept in the output tree. Patch
+ from Patrice Dumas (RT #74390).
+
2013-02-16 David E. Wheeler <david@justatheory.org>
* Release 3.25
View
@@ -113,6 +113,35 @@ sub any_errata_seen { # good for using as an exit() value...
return shift->{'errors_seen'} || 0;
}
+# Returns the encoding only if it was recognized as being handled and set
+sub detected_encoding {
+ return shift->{'detected_encoding'};
+}
+
+sub encoding {
+ my $this = shift;
+ return $this->{'encoding'} unless @_; # GET.
+
+ $this->_handle_encoding_line("=encoding $_[0]");
+ if ($this->{'_processed_encoding'}) {
+ delete $this->{'_processed_encoding'};
+ if(! $this->{'encoding_command_statuses'} ) {
+ DEBUG > 2 and print " CRAZY ERROR: encoding wasn't really handled?!\n";
+ } elsif( $this->{'encoding_command_statuses'}[-1] ) {
+ $this->whine( "=encoding $_[0]",
+ sprintf "Couldn't do %s: %s",
+ $this->{'encoding_command_reqs' }[-1],
+ $this->{'encoding_command_statuses'}[-1],
+ );
+ } else {
+ DEBUG > 2 and print " (encoding successfully handled.)\n";
+ }
+ return $this->{'encoding'};
+ } else {
+ return undef;
+ }
+}
+
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Pull in some functions that, for some reason, I expect to see here too:
BEGIN {
View
@@ -289,6 +289,16 @@ I<Example:>
die "too many errors\n" if $parser->any_errata_seen();
+=item C<< $parser->detected_encoding() >>X<detected_encoding>
+
+Return the encoding corresponding to C<< =encoding >>, but only if the
+encoding was recognized and handled.
+
+=item C<< $parser->encoding() >>X<encoding>
+
+Return encoding of the document, even if the encoding is not correctly
+handled.
+
=item C<< $parser->parse_from_file( $source, $to ) >>X<parse_from_file>
Parses from C<$source> file to C<$to> file. Similar to L<<
View
@@ -91,6 +91,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
$self->_handle_encoding_line( "=encoding utf8" );
+ delete $self->{'_processed_encoding'};
$line =~ tr/\n\r//d;
} elsif( $line =~ s/^\xFE\xFF//s ) {
@@ -343,6 +344,7 @@ sub _handle_encoding_line {
$@ && die( $enc_error =
"Really unexpected error setting up encoding $e: $@\nAborting"
);
+ $self->{'detected_encoding'} = $e;
} else {
my @supported = Pod::Simple::Transcode::->all_encodings;
@@ -373,8 +375,13 @@ sub _handle_encoding_line {
$self->scream( $self->{'line_count'}, $enc_error );
}
push @{ $self->{'encoding_command_statuses'} }, $enc_error;
+ if (defined($self->{'_processed_encoding'})) {
+ # Should never happen
+ die "Nested processed encoding.";
+ }
+ $self->{'_processed_encoding'} = $orig;
- return '=encoding ALREADYDONE';
+ return $line;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -390,7 +397,11 @@ sub _handle_encoding_second_level {
DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
- if($content eq 'ALREADYDONE') {
+ if (defined($self->{'_processed_encoding'})) {
+ #if($content ne $self->{'_processed_encoding'}) {
+ # Could it happen?
+ #}
+ delete $self->{'_processed_encoding'};
# It's already been handled. Check for errors.
if(! $self->{'encoding_command_statuses'} ) {
DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
@@ -793,8 +804,7 @@ sub _ponder_paragraph_buffer {
} elsif($para_type eq '=encoding') {
# Not actually acted on here, but we catch errors here.
$self->_handle_encoding_second_level($para);
-
- next; # and skip
+ $para_type = 'Plain';
} elsif($para_type eq '~Verbatim') {
$para->[0] = 'Verbatim';
$para_type = '?Verbatim';
View
@@ -8,6 +8,9 @@
<head1 start_line="7">
DESCRIPTION
</head1>
+ <encoding start_line="9">
+ iso-2022-jp
+ </encoding>
<Para start_line="11">
This is a test Pod document in ISO-2202-JP. Its content is some Japanese
haiku by famous poets.
View
@@ -8,6 +8,9 @@
<head1 start_line="7">
DESCRIPTION
</head1>
+ <encoding start_line="9">
+ iso-2022-jp
+ </encoding>
<Para start_line="11">
This is a test Pod document in ISO-2202-JP. Its content is some Japanese
haiku by famous poets.
View
@@ -8,6 +8,9 @@
<head1 start_line="6">
DESCRIPTION
</head1>
+ <encoding start_line="8">
+ iso-2022-jp
+ </encoding>
<Para start_line="10">
This is a test Pod document in ISO-2202-JP. Its content is some Japanese
haiku by famous poets.
View
@@ -8,6 +8,9 @@
<head1 start_line="6">
DESCRIPTION
</head1>
+ <encoding start_line="8">
+ iso-2022-jp
+ </encoding>
<Para start_line="10">
This is a test Pod document in ISO-2202-JP.
</Para>
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ iso-8859-7
+ </encoding>
<head1 start_line="4">
NAME
</head1>
View
@@ -12,6 +12,9 @@
This Pod document is a paragraph in Arabic from &#34;The Five Pillars of
Islam&#34; as CP-1256.
</Para>
+ <encoding start_line="11">
+ cp1256
+ </encoding>
<Para start_line="13">
&#1608;&#1593;&#1606; &#1593;&#1605;&#1575;&#1585;&#1577; &#1576;&#1606;
&#1581;&#1586;&#1605; &#1602;&#1575;&#1604; &#1602;&#1575;&#1604;
View
@@ -1,4 +1,7 @@
<Document start_line="5">
+ <encoding start_line="5">
+ koi8-r
+ </encoding>
<head1 start_line="7">
NAME
</head1>
@@ -12,6 +15,9 @@
<head1 start_line="11">
TEXT
</head1>
+ <encoding start_line="13">
+ Shift-JIS
+ </encoding>
<Para start_line="15">
(This is a test Pod pocument in KOI8-R.)
</Para>
View
@@ -1,4 +1,7 @@
<Document start_line="5">
+ <encoding start_line="5">
+ koi8-r
+ </encoding>
<head1 start_line="7">
NAME
</head1>
@@ -103,6 +106,9 @@
&#1042;&#1089;&#1090;&#1072;&#1074;&#1072;&#1083;&#1072;
&#1074;&#1076;&#1088;&#1091;&#1075;&#1079;&#1072;&#1088;&#1103;.
</VerbatimFormatted>
+ <encoding start_line="44">
+ koi8-r
+ </encoding>
<VerbatimFormatted start_line="46" xml:space="preserve">
&#1048; &#1074; &#1101;&#1090;&#1091;
&#1082;&#1088;&#1072;&#1089;&#1086;&#1090;&#1091;
View
@@ -12,6 +12,9 @@
This document is a paragraph in Arabic from &#34;The Five Pillars of
Islam&#34; as ISO-8859-6.
</Para>
+ <encoding start_line="11">
+ iso-8859-6
+ </encoding>
<Para start_line="13">
&#1608;&#1593;&#1606; &#1593;&#1605;&#1575;&#1585;&#1577; &#1576;&#1606;
&#1581;&#1586;&#1605; &#1602;&#1575;&#1604; &#1602;&#1575;&#1604;
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ koi8-r
+ </encoding>
<head1 start_line="4">
NAME
</head1>
View
@@ -1,4 +1,7 @@
<Document start_line="1">
+ <encoding start_line="1">
+ big5
+ </encoding>
<head1 start_line="3">
&#32769;&#23376;&#36947;&#24503;&#32147; &#19977;&#21313;&#20843;&#31456;
-- Big5 (Chinese) encoding test
View
@@ -1,4 +1,7 @@
<Document start_line="1">
+ <encoding start_line="1">
+ big5-eten
+ </encoding>
<head1 start_line="3">
&#32769;&#23376;&#36947;&#24503;&#32147; &#19977;&#21313;&#20843;&#31456;
-- Big5 (Chinese) encoding test
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ big5
+ </encoding>
<head1 start_line="4">
NAME
</head1>
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ iso-8859-1
+ </encoding>
<head1 start_line="4">
NAME
</head1>
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ cp1251
+ </encoding>
<head1 start_line="4">
NAME
</head1>
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ ascii
+ </encoding>
<head1 start_line="4">
NAME
</head1>
@@ -1,4 +1,7 @@
<Document start_line="4">
+ <encoding start_line="4">
+ iso-8859-1
+ </encoding>
<head1 start_line="6">
NAME
</head1>
View
@@ -1,4 +1,7 @@
<Document start_line="4">
+ <encoding start_line="4">
+ utf8
+ </encoding>
<head1 start_line="6">
NAME
</head1>
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ utf8
+ </encoding>
<head1 start_line="4">
NAME
</head1>
View
@@ -1,4 +1,7 @@
<Document start_line="2">
+ <encoding start_line="2">
+ shiftjis
+ </encoding>
<head1 start_line="4">
NAME
</head1>
View
@@ -8,6 +8,9 @@
<head1 start_line="6">
DESCRIPTION
</head1>
+ <encoding start_line="8">
+ iso-8859-11
+ </encoding>
<Para start_line="10">
This is a test Pod document in ISO-8859-11. Its content is a poem to (by?)
Khun Thong Dang

0 comments on commit fc97ecd

Please sign in to comment.