diff --git a/Makefile.PL b/Makefile.PL index 869868d5..1e28c7c7 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -17,7 +17,7 @@ WriteMakefile( 'HTTP::Date' => 6, 'MIME::Base64' => "2.1", 'MIME::QuotedPrint' => 0, - 'HTML::Parser' => "3.33", + 'IO::HTML' => 0, 'Encode' => "2.12", 'Encode::Locale' => 1, 'LWP::MediaTypes' => 6, diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm index d85f5a3b..1241716e 100644 --- a/lib/HTTP/Message.pm +++ b/lib/HTTP/Message.pm @@ -236,34 +236,11 @@ sub content_charset elsif ($self->content_is_html) { # look for or # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding - my $charset; - require HTML::Parser; - my $p = HTML::Parser->new( - start_h => [sub { - my($tag, $attr, $self) = @_; - $charset = $attr->{charset}; - unless ($charset) { - # look at $attr->{content} ... - if (my $c = $attr->{content}) { - require HTTP::Headers::Util; - my @v = HTTP::Headers::Util::split_header_words($c); - return unless @v; - my($ct, undef, %ct_param) = @{$v[0]}; - $charset = $ct_param{charset}; - } - return unless $charset; - } - if ($charset =~ /^utf-?16/i) { - # converted document, assume UTF-8 - $charset = "UTF-8"; - } - $self->eof; - }, "tagname, attr, self"], - report_tags => [qw(meta)], - utf8_mode => 1, - ); - $p->parse($$cref); - return $charset if $charset; + require IO::HTML; + # Use relaxed search to match previous versions of HTTP::Message: + my $encoding = IO::HTML::find_charset_in($$cref, { encoding => 1, + need_pragma => 0 }); + return $encoding->mime_name if $encoding; } elsif ($self->content_type eq "application/json") { for ($$cref) {