Skip to content

Commit

Permalink
Merge pull request #45 from ilmari/speedup-parse
Browse files Browse the repository at this point in the history
Parsing speedups from @ilmari.
  • Loading branch information
theory committed Feb 13, 2013
2 parents 6b6c9d6 + 66b6076 commit 5089872
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 26 deletions.
38 changes: 14 additions & 24 deletions lib/Pod/Simple/BlackBox.pm
Expand Up @@ -123,8 +123,20 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
}
}

if(!$self->parse_characters && !$self->{'encoding'}) {
$self->_try_encoding_guess($line)
if(!$self->{'parse_characters'} && !$self->{'encoding'}
&& ($self->{'in_pod'} || $line =~ /^=/s)
&& $line =~ /[^\x00-\x7f]/
) {
my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
$self->_handle_encoding_line( "=encoding $encoding" );
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);

my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;

$self->whine(
$self->{'line_count'},
"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
);
}

DEBUG > 5 and print "# Parsing line: [$line]\n";
Expand Down Expand Up @@ -401,28 +413,6 @@ sub _handle_encoding_second_level {
return;
}

sub _try_encoding_guess {
my ($self,$line) = @_;

if(!$self->{'in_pod'} and $line !~ /^=/m) {
return; # don't whine about non-ASCII bytes in code/comments
}

return unless $line =~ /[^\x00-\x7f]/; # Look for non-ASCII byte

my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
$self->_handle_encoding_line( "=encoding $encoding" );
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);

my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;

$self->whine(
$self->{'line_count'},
"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
);

}

#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`

{
Expand Down
4 changes: 2 additions & 2 deletions lib/Pod/Simple/TranscodeSmart.pm
Expand Up @@ -27,12 +27,12 @@ sub encmodver {
}

sub make_transcoder {
my($e) = $_[1];
my $e = Encode::find_encoding($_[1]);
die "WHAT ENCODING!?!?" unless $e;
my $x;
return sub {
foreach $x (@_) {
$x = Encode::decode($e, $x) unless Encode::is_utf8($x);
$x = $e->decode($x) unless Encode::is_utf8($x);
}
return;
};
Expand Down

0 comments on commit 5089872

Please sign in to comment.