Skip to content

Commit

Permalink
Tidies many files.
Browse files Browse the repository at this point in the history
  • Loading branch information
oalders committed Mar 1, 2017
1 parent d1a86f6 commit 0e6bb80
Show file tree
Hide file tree
Showing 11 changed files with 130 additions and 108 deletions.
4 changes: 2 additions & 2 deletions examples/sanitize_file.pl
Expand Up @@ -9,10 +9,10 @@
my $filename = shift @ARGV;
die "usage: perl $0 path/to/file > path/to/new/file" if !$filename;

my $text = read_file( $filename );
my $text = read_file($filename);

my $hr = HTML::Restrict->new;
print $hr->process( $text );
print $hr->process($text);

=pod
Expand Down
74 changes: 38 additions & 36 deletions lib/HTML/Restrict.pm
Expand Up @@ -35,16 +35,16 @@ has 'debug' => (
);

has 'parser' => (
is => 'ro',
lazy => 1,
builder => '_build_parser',
is => 'ro',
lazy => 1,
builder => '_build_parser',
);

has 'rules' => (
is => 'rw',
isa => HashRef,
required => 0,
default => quote_sub( q{ {} } ),
default => quote_sub(q{ {} }),
trigger => \&_build_parser,
reader => 'get_rules',
writer => 'set_rules',
Expand All @@ -53,12 +53,12 @@ has 'rules' => (
has 'strip_enclosed_content' => (
is => 'rw',
isa => ArrayRef,
default => sub { ['script', 'style'] },
default => sub { [ 'script', 'style' ] },
);

has 'replace_img' => (
is => 'rw',
isa => Bool | CodeRef,
isa => Bool | CodeRef,
default => 0,
);

Expand Down Expand Up @@ -101,7 +101,7 @@ sub _build_parser {

# don't allow any upper case tag or attribute names
# these rules would otherwise silently be ignored
if ( $rules ) {
if ($rules) {
foreach my $tag_name ( keys %{$rules} ) {
if ( lc $tag_name ne $tag_name ) {
croak "All tag names must be lower cased";
Expand All @@ -110,8 +110,8 @@ sub _build_parser {
my @attr_names;
foreach my $attr_item ( @{ $rules->{$tag_name} } ) {
ref $attr_item eq 'HASH'
? push(@attr_names, keys(%$attr_item))
: push(@attr_names, $attr_item);
? push( @attr_names, keys(%$attr_item) )
: push( @attr_names, $attr_item );
}
for (@attr_names) {
croak "All attribute names must be lower cased"
Expand All @@ -121,7 +121,7 @@ sub _build_parser {
}
}

weaken( $self );
weaken($self);
return HTML::Parser->new(

start_h => [
Expand All @@ -135,39 +135,41 @@ sub _build_parser {

foreach my $source_type ( 'href', 'src', 'cite' ) {

if ( $attr->{$source_type} )
{
if ( $attr->{$source_type} ) {
my $uri = URI->new( $attr->{$source_type} );
if (defined $uri->scheme) {
if ( defined $uri->scheme ) {
delete $attr->{$source_type}
if none { $_ eq $uri->scheme }
grep defined, @{ $self->get_uri_schemes };
grep defined, @{ $self->get_uri_schemes };
}
else { # relative uri
else { # relative uri
delete $attr->{$source_type}
unless grep !defined,
@{ $self->get_uri_schemes };
@{ $self->get_uri_schemes };
}
}
}

foreach
my $attr_item ( @{ $self->get_rules->{$tagname} } )
{
if (ref $attr_item eq 'HASH') {
my $attr_item ( @{ $self->get_rules->{$tagname} } ) {
if ( ref $attr_item eq 'HASH' ) {

# validate against regex contraints
for my $attr_name (sort keys %$attr_item) {
for my $attr_name ( sort keys %$attr_item ) {
if ( exists $attr->{$attr_name} ) {
my $value = encode_entities($attr->{$attr_name});
my $value = encode_entities(
$attr->{$attr_name} );
$more .= qq[ $attr_name="$value" ]
if $attr->{$attr_name} =~ $attr_item->{$attr_name};
if $attr->{$attr_name}
=~ $attr_item->{$attr_name};
}
}
}
else {
my $attr_name = $attr_item;
if ( exists $attr->{$attr_name} ) {
my $value = encode_entities($attr->{$attr_name});
my $value
= encode_entities( $attr->{$attr_name} );
$more .= qq[ $attr_name="$value" ]
unless $attr_name eq q{/};
}
Expand All @@ -188,17 +190,19 @@ sub _build_parser {
elsif ( $tagname eq 'img' && $self->replace_img ) {
my $alt;
if ( ref $self->replace_img ) {
$alt = $self->replace_img->($tagname, $attr, $text);
$alt = $self->replace_img->( $tagname, $attr, $text );
}
else {
$alt = defined( $attr->{alt} ) ? ": $attr->{alt}" : "";
$alt
= defined( $attr->{alt} ) ? ": $attr->{alt}" : "";
$alt = "[IMAGE$alt]";
}
$self->_processed( ( $self->_processed || q{} ) . $alt );
}
elsif (
any { $_ eq $tagname } @{ $self->strip_enclosed_content } )
{
any { $_ eq $tagname }
@{ $self->strip_enclosed_content }
) {
print "adding $tagname to strippers" if $self->debug;
push @{ $self->_stripper_stack }, $tagname;
}
Expand All @@ -214,10 +218,8 @@ sub _build_parser {
if ( any { $_ eq $tagname } keys %{ $self->get_rules } ) {
$self->_processed( ( $self->_processed || q{} ) . $text );
}
elsif (
any { $_ eq $tagname } @{ $self->_stripper_stack } )
{
$self->_delete_tag_from_stack( $tagname );
elsif ( any { $_ eq $tagname } @{ $self->_stripper_stack } ) {
$self->_delete_tag_from_stack($tagname);
}

},
Expand All @@ -228,7 +230,7 @@ sub _build_parser {
sub {
my ( $p, $text ) = @_;
print "text: $text\n" if $self->debug;
if ( !@{$self->_stripper_stack} ) {
if ( !@{ $self->_stripper_stack } ) {
$self->_processed( ( $self->_processed || q{} ) . $text );
}
},
Expand Down Expand Up @@ -269,13 +271,13 @@ sub process {
return if !@_;
return $_[0] if !$_[0];

my ( $content ) = @_;
my ($content) = @_;
die 'content must be a string!'
unless ref( \$content ) eq 'SCALAR';
$self->_clear_processed;

my $parser = $self->parser;
$parser->parse( $content );
$parser->parse($content);
$parser->eof;

my $text = $self->_processed;
Expand All @@ -284,10 +286,10 @@ sub process {
$text =~ s{\A\s*}{}gxms;
$text =~ s{\s*\z}{}gxms;
}
$self->_processed( $text );
$self->_processed($text);

# ensure stripper stack is reset in case of broken html
$self->_stripper_stack([ ]);
$self->_stripper_stack( [] );

return $self->_processed;

Expand Down
37 changes: 25 additions & 12 deletions t/attribute_constraints.t
Expand Up @@ -8,45 +8,58 @@ my $hr = HTML::Restrict->new(
rules => {
iframe => [
qw( width height ),
{ src => qr{^http://www\.youtube\.com},
{
src => qr{^http://www\.youtube\.com},
frameborder => qr{^(0|1)$},
}
],
},
);

cmp_ok(
$hr->process('<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'),
'eq', '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>',
$hr->process(
'<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
),
'eq',
'<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>',
'all constraints pass',
);

cmp_ok(
$hr->process('<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="0"></iframe>'),
'eq', '<iframe width="560" height="315" frameborder="0"></iframe>',
$hr->process(
'<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="0"></iframe>'
),
'eq',
'<iframe width="560" height="315" frameborder="0"></iframe>',
'one constraint fails',
);

cmp_ok(
$hr->process('<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="A"></iframe>'),
'eq', '<iframe width="560" height="315"></iframe>',
$hr->process(
'<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="A"></iframe>'
),
'eq',
'<iframe width="560" height="315"></iframe>',
'two constraints fail',
);

$hr = HTML::Restrict->new(
rules => {
iframe => [
{ src => qr{^http://www\.youtube\.com} },
{ frameborder => qr{^(0|1)$} },
{ height => qr{^315$} },
{ width => qr{^560$} },
{ frameborder => qr{^(0|1)$} },
{ height => qr{^315$} },
{ width => qr{^560$} },
],
},
);

cmp_ok(
$hr->process('<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'),
'eq', '<iframe src="http://www.youtube.com/embed/9gKeRZM2Iyc" frameborder="0" height="315" width="560"></iframe>',
$hr->process(
'<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
),
'eq',
'<iframe src="http://www.youtube.com/embed/9gKeRZM2Iyc" frameborder="0" height="315" width="560"></iframe>',
'possible to maintain order',
);

Expand Down
23 changes: 11 additions & 12 deletions t/comments.t
Expand Up @@ -8,27 +8,26 @@ use Test::More;

my $hr = HTML::Restrict->new;

my $text = '<!-- comment here -->stuff';
$hr->debug( 0 );
my $text = '<!-- comment here -->stuff';
$hr->debug(0);

is $hr->process( $text ), 'stuff', 'comments allowed';
$hr->allow_comments( 1 );
is $hr->process( $text ), $text, 'comments allowd';
is $hr->process($text), 'stuff', 'comments allowed';
$hr->allow_comments(1);
is $hr->process($text), $text, 'comments allowd';

$text = 'before<!-- This is a comment -- -- So is this -->after';
$hr->allow_comments( 0 );
$hr->allow_comments(0);

is $hr->process( $text ), 'beforeafter', 'comment allowed';
is $hr->process($text), 'beforeafter', 'comment allowed';

$hr->allow_comments( 1 );
is $hr->process( $text ), $text, 'comments allowd';
$hr->allow_comments(1);
is $hr->process($text), $text, 'comments allowd';

$hr->allow_comments( 0 );
$hr->allow_comments(0);
$text = '<!-- <script> <h1> -->';
is $hr->process( $text ), undef, 'tags nested in comments removed';
is $hr->process($text), undef, 'tags nested in comments removed';

#$hr->set_rules({ script => [], 'h1' => [] });
#is $hr->process( $text ), $text, 'tags nested in comments not removed when explicitly allowed';


done_testing();
15 changes: 8 additions & 7 deletions t/declaration.t
Expand Up @@ -8,14 +8,15 @@ use Test::More;

my $hr = HTML::Restrict->new;

my $text = '<!DOCTYPE HTML> ';
$hr->debug( 1 );
my $text = '<!DOCTYPE HTML> ';
$hr->debug(1);

is $hr->process( $text ), '', 'declaration not preserved';
$hr->allow_declaration( 1 );
is $hr->process( $text ), '<!DOCTYPE HTML>', 'declaration is preserved';
is $hr->process($text), '', 'declaration not preserved';
$hr->allow_declaration(1);
is $hr->process($text), '<!DOCTYPE HTML>', 'declaration is preserved';

$text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
is $hr->process( $text ), $text, 'declaration preserved';
$text
= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
is $hr->process($text), $text, 'declaration preserved';

done_testing();
4 changes: 2 additions & 2 deletions t/js.t
Expand Up @@ -15,10 +15,10 @@ $(document).ready(function() {
});
</script>];

is( $hr->process( $html ), undef, "content of script tags removed by default" );
is( $hr->process($html), undef, "content of script tags removed by default" );

$hr->set_rules( { script => ['type'] } );

is( $hr->process( $html ), $html, "content of script preserved" );
is( $hr->process($html), $html, "content of script preserved" );

done_testing();
13 changes: 9 additions & 4 deletions t/replace_img.t
Expand Up @@ -14,7 +14,7 @@ my @texts = (
},
{
label => "<img ... ></img>",
html => q{<img alt="foo bar" src="http://example.com/foo.jpg"></img>},
html => q{<img alt="foo bar" src="http://example.com/foo.jpg"></img>},
},
);

Expand Down Expand Up @@ -47,10 +47,15 @@ sub replacer {
}

for my $c (@cases) {
ok( my $hr = HTML::Restrict->new( debug => 0, %{ $c->{args} } ),
"$c->{label}: HTML::Restrict->new(...)" );
ok(
my $hr = HTML::Restrict->new( debug => 0, %{ $c->{args} } ),
"$c->{label}: HTML::Restrict->new(...)"
);
for my $t (@texts) {
is( $hr->process( $t->{html} ), $c->{expect}, "$c->{label}: $t->{label}" );
is(
$hr->process( $t->{html} ), $c->{expect},
"$c->{label}: $t->{label}"
);
}
}

Expand Down

0 comments on commit 0e6bb80

Please sign in to comment.