Skip to content

Commit

Permalink
Guess encoding of changes file (#27)
Browse files Browse the repository at this point in the history
It can happen that the distribution's Changes file is encoded in
Latin1, but our .changes is in utf8.
We should let perl guess when reading, and be specific when
writing.
  • Loading branch information
perlpunk committed Mar 3, 2021
1 parent e6526a8 commit 48a5bdd
Showing 1 changed file with 26 additions and 7 deletions.
33 changes: 26 additions & 7 deletions cpanspec
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ use File::Temp;
use File::Path qw(rmtree);
use Intrusive;
use Perl::PrereqScanner;
use Encode qw/ decode_utf8 /;
use Encode::Guess;

require Carp;

Expand Down Expand Up @@ -456,14 +458,18 @@ sub is_in_core($$) {
return $ret;
}

sub readfile($) {
sub readfile {
my ($filename, $encoding) = @_;
$encoding //= '';
local $/ = undef;
my $filename = shift;
die "empty filename" unless length($filename);
open FILE, $basedir . $filename or return undef;
binmode FILE;
my $string = <FILE>;
close FILE;
open my $fh, "<", $basedir . $filename or return undef;
binmode $fh;
my $string = <$fh>;
if ($encoding eq 'guess') {
$string = decode_latin_or_utf8($string);
}
close $fh;
return $string;
}

Expand Down Expand Up @@ -901,7 +907,7 @@ for my $ofile (@args) {
my $candidate = lc $entry;
if (!$changes && ($candidate =~ m/^changes/ || $candidate =~ m/^changelog/ || $candidate =~ m/^history/)) {
$changesfile = $entry;
$changes = readfile("$path/$entry");
$changes = readfile("$path/$entry", 'guess');
}
if (-x "$basedir$path/$entry" && -f "$basedir$path/$entry") {
Expand Down Expand Up @@ -1703,13 +1709,15 @@ END
my $changes_diff;

my ($tfh, $tmpfile) = File::Temp::tempfile;
binmode $tfh, ':encoding(UTF-8)';
my $cltxt = "";

if (-f $changelogfile) {
my $txt = "- updated to $version\n see $defaultdocdir/$basename/$changesfile\n\n";
$cltxt .= $txt;
if ($old_file && $changes) {
my $old_changes = extract_old_changes($old_file, $changesfile);
$old_changes = decode_latin_or_utf8($old_changes);
$old_changes =~ s,\r\n,\n,g;
$cltxt .= diff_changes($old_changes, $changes) if $old_changes;
}
Expand Down Expand Up @@ -1738,4 +1746,15 @@ END
}
}

sub decode_latin_or_utf8 {
my ($string) = @_;
my $enc = guess_encoding($string, qw/ utf8 latin1 /);
unless (ref $enc) {
return decode_utf8 $string;
}
#my $name = $enc->name;
$string = $enc->decode($string);
return $string;
}

# vi: set ai et:

0 comments on commit 48a5bdd

Please sign in to comment.