diff --git a/lib/HTML/Entities.pm b/lib/HTML/Entities.pm index 73fbdb6..b1f1c48 100644 --- a/lib/HTML/Entities.pm +++ b/lib/HTML/Entities.pm @@ -443,23 +443,46 @@ sub encode_entities } else { $ref = \$_[0]; # modify in-place } + my $regex; if (defined $_[1] and length $_[1]) { - unless (exists $subst{$_[1]}) { - # Because we can't compile regex we fake it with a cached sub + $regex = $subst{$_[1]}; + unless (defined $regex) { my $chars = $_[1]; - $chars =~ s,(?', ''' and '"' - $$ref =~ s/([^\n\r\t !\#\$%\(-;=?-~])/$char2entity{$1} || num_entity($1)/ge; + $regex = qr/([^\n\r\t !\#\$%\(-;=?-~])/; } + $$ref =~ s/$regex/$char2entity{$1} || num_entity($1)/eg; $$ref; } diff --git a/t/entities.t b/t/entities.t index 7ded0a9..92d284b 100644 --- a/t/entities.t +++ b/t/entities.t @@ -3,7 +3,7 @@ use warnings; use utf8; use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric); -use Test::More tests => 20; +use Test::More tests => 29; my $x = "Våre norske tegn bør æres"; @@ -33,6 +33,21 @@ is(encode_entities($x, '\\/'), "[24/7]\\"); is(encode_entities($x, '\\'), "[24/7]\"); is(encode_entities($x, ']\\'), "[24/7]\"); +# https://github.com/libwww-perl/HTML-Parser/issues/44 +$x = '<]$a/b\c/d$'; +is(encode_entities($x, '$'), '<]$a/b\\c/d$'); +is(encode_entities($x, '\\\\/'), '<]$a/b\c/d$'); +is(encode_entities($x, '\\\\/$'), '<]$a/b\c/d$'); +is(encode_entities($x, '<\\\\]'), '<]$a/b\c/d$'); + +# POSIX character classes +$x = "'), "<Våre123[=]"); +is(encode_entities($x, '[:Vlower:]'), "<Våre123[=]"); + # See how well it does against rfc1866... my $ent = ''; my $plain = '';