Permalink
Browse files

Decide against checking =encoding; Just try raw as utf-8

reverts 502a86f
  • Loading branch information...
1 parent eada17e commit 1ae0539ef70bf6e51b4079d8057ace2db5c8e784 @rwstauner rwstauner committed Mar 29, 2013
Showing with 8 additions and 24 deletions.
  1. +5 −9 lib/MetaCPAN/Web/Model/API.pm
  2. +3 −15 t/encoding.t
@@ -101,17 +101,13 @@ sub raw_api_response {
# we'll probably have the least number of issues if we assume utf8
try {
if( $data ){
- my $enc;
- # honor encoding if specified (as something other than utf-8)
- if( ($enc) = ($data =~ /^=encoding\s+(\S+)\s*$/m) and $enc !~ /^utf-?8$/i ){
- $data = Encode::decode( $enc, $data, $encode_check );
- }
- # theoretically we could check for a BOM here
- # else try UTF-8
- else {
+ # We could detect a pod =encoding line but any perl code in that file
+ # is likely ascii or UTF-8. We could potentially check for a BOM
+ # but those aren't used often and aren't likely to appear here.
+ # For now just attempt to decode it as UTF-8 since that's probably
+ # what people should be using. (See also #378).
# decode so the template doesn't double-encode and return mojibake
$data = $encoding->decode( $data, $encode_check );
- }
}
}
catch {
View
@@ -92,6 +92,9 @@ foreach my $ctype ( 'text/plain', 'application/json' ){
[ encode(utf8 => "foo\x{FFFF_FFFF}bar"), 'encoded lax perl utf8 chars' ],
[ encode(utf8 => "=encoding utf8\n\nfoo\x{FFFF_FFFF}bar"), '=encoding utf8 with bad chars' ],
[ "\225 cp1252 bullet", 'invalid utf-8 bytes' ],
+ # we ignore =encoding in raw responses
+ [ "=pod\n\n=encoding latin9\n\nsome pod \xa4\n\n=cut\n", 'non-utf8 chars' ],
+ [ "=pod\n\n=encoding foo-bar-baz\n\nsome char \xfe", 'unknown =encoding ignored' ],
){
test_raw_response($bad->[0], $bad->[0], $bad->[1] . " come back as is",
warnings => [qr/does not map to Unicode/, 'encode croaked'],
@@ -114,21 +117,6 @@ foreach my $ctype ( 'text/plain', 'application/json' ){
'utf-8 bytes decode to perl string'
);
- # EURO SIGN
- test_raw_response(
- "=pod\n\n=encoding latin9\n\nsome pod \xa4\n\n=cut\n",
- "=pod\n\n=encoding latin9\n\nsome pod \x{20ac}\n\n=cut\n",
- 'pod in other encoding converted to utf-8',
- );
-
- # bad encoding
- test_raw_response(
- ("=pod\n\n=encoding foo-bar-baz\n\nsome char \xfe") x 2,
- 'pod with unknown encoding left as is',
- warnings => [qr/Unknown encoding/, 'unknown encoding'],
- not_utf8 => 1,
- );
-
# not sure if we'll ever actually get undef
is get_raw(undef), '', 'undef becomes blank';
ok !@warnings, 'no warnings for undef' or diag shift @warnings;

0 comments on commit 1ae0539

Please sign in to comment.