Skip to content
This repository has been archived by the owner on Apr 14, 2019. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:petdance/html-lint into nolint
Browse files Browse the repository at this point in the history
Conflicts:
	.gitignore
	lib/HTML/Lint.pm
	lib/HTML/Lint/Error.pm
  • Loading branch information
petdance committed Mar 29, 2012
2 parents cfd80f9 + a5b4bc0 commit 14a094d
Show file tree
Hide file tree
Showing 13 changed files with 243 additions and 71 deletions.
5 changes: 1 addition & 4 deletions .gitignore
Expand Up @@ -2,7 +2,4 @@ Makefile
Makefile.old
blib/
pm_to_blib

# These two are my symlinks
Error.pm
Lint.pm
MYMETA.*
6 changes: 6 additions & 0 deletions Changes
Expand Up @@ -6,6 +6,12 @@ NOTE: All bugs and requests are now being handled through GitHub.

DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/

NEXT
Added check for unknown entities (&foo;) and unclosed entitities
(&amp with a semicolon).

Added a check for a bare ampersand that should be written as &

2.10 Tue Dec 6 11:16:16 CST 2011

HTML::Lint is now explicitly licensed under Artistic License 2.0,
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST
Expand Up @@ -35,4 +35,6 @@ t/pod-coverage.t
t/random-nobr.t
t/strong-id.t
t/text-use-entity.t
t/text-unknown-entity.t
t/text-unclosed-entity.t
t/xhtml-html.t
26 changes: 13 additions & 13 deletions Makefile.PL
Expand Up @@ -2,10 +2,8 @@ use strict;
use ExtUtils::MakeMaker qw( WriteMakefile );
use 5.006001;

eval { require LWP::Simple; };

if ( $@ ) {
print <<EOF;
if ( not eval { require LWP::Simple; 1; } ) {
print <<'EOF';
NOTE: It seems that you don't have LWP::Simple installed.
The weblint program will not be able to retrieve web pages.
Expand All @@ -21,12 +19,14 @@ my %parms = (
PMLIBDIRS => [qw(lib/)],
AUTHOR => 'Andy Lester <andy@petdance.com>',
PREREQ_PM => {
'Exporter' => 0,
'Test::More' => 0,
'Test::Builder' => 0,
'HTML::Parser' => '3.47',
'HTML::Tagset' => '3.03',
'File::Find' => 0,
'Exporter' => 0,
'Test::More' => 0,
'Test::Builder' => 0,

'File::Find' => 0,
'HTML::Entities' => 0,
'HTML::Parser' => '3.47',
'HTML::Tagset' => '3.03',
},
EXE_FILES => [qw(bin/weblint)],
dist => {
Expand All @@ -43,9 +43,9 @@ if ( $ExtUtils::MakeMaker::VERSION =~ /^\d\.\d\d$/ and $ExtUtils::MakeMaker::VER
if ( $ExtUtils::MakeMaker::VERSION ge '6.46' ) {
$parms{META_ADD} = {
resources => {
homepage => 'http://search.cpan.org/dist/html-lint',
homepage => 'http://search.cpan.org/dist/html-lint',
bugtracker => 'https://github.com/petdance/html-lint/issues',
license => 'http://www.opensource.org/licenses/artistic-license-2.0.php',
license => 'http://www.opensource.org/licenses/artistic-license-2.0.php',
Repository => 'https://github.com/petdance/html-lint',
}
};
Expand All @@ -66,7 +66,7 @@ tags:
--languages=Perl --langmap=Perl:+.t \
critic:
perlcritic -1 -q -profile perlcriticrc bin/weblint lib/
perlcritic -1 -q -profile perlcriticrc lib/ bin/weblint Makefile.PL
PROF_ARGS = -Mblib blib/script/weblint index.html
Expand Down
93 changes: 75 additions & 18 deletions lib/HTML/Lint.pm
Expand Up @@ -4,18 +4,19 @@ use warnings;
use strict;

use HTML::Lint::Error;
use HTML::Entities ();

=head1 NAME
HTML::Lint - check for HTML errors in a string or file
=head1 VERSION
Version 2.10
Version 2.11_01
=cut

our $VERSION = '2.10';
our $VERSION = '2.11_01';

=head1 SYNOPSIS
Expand Down Expand Up @@ -80,7 +81,6 @@ sub new {
_types => [],
_flags => {},
};
$self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } );
bless $self, $class;

if ( my $only = $args{only_types} ) {
Expand All @@ -93,7 +93,24 @@ sub new {
return $self;
}

=head2 $lint->parse( $chunk )
=head2 $lint->parser()
Returns the parser object for this object, creating one if necessary.
=cut

sub parser {
my $self = shift;

if ( not $self->{_parser} ) {
$self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } );
$self->{_parser}->ignore_elements( qw(script style) );
}

return $self->{_parser};
}

=head2 $lint->parse( $text )
=head2 $lint->parse( $code_ref )
Expand All @@ -105,7 +122,8 @@ See L<HTML::Parser>'s C<parse_file> method for details.

sub parse {
my $self = shift;
return $self->_parser->parse( @_ );

return $self->parser->parse( @_ );
}

=head2 $lint->parse_file( $file )
Expand All @@ -118,7 +136,7 @@ See L<HTML::Parser>'s C<parse_file> method for details.

sub parse_file {
my $self = shift;
return $self->_parser->parse_file( @_ );
return $self->parser->parse_file( @_ );
}

=head2 $lint->eof
Expand All @@ -135,9 +153,9 @@ sub eof {
my $self = shift;

my $rc;
my $parser = $self->_parser;
my $parser = $self->parser;
if ( $parser ) {
$rc = $self->_parser->eof(@_);
$rc = $self->parser->eof(@_);
delete $self->{_parser};
}

Expand Down Expand Up @@ -231,10 +249,8 @@ in case, here you go.
sub gripe {
my $self = shift;

my $parser = $self->_parser;

my $error = HTML::Lint::Error->new(
$self->{_file}, $parser->{_line}, $parser->{_column}, @_ );
$self->{_file}, $self->parser->{_line}, $self->parser->{_column}, @_ );

my @keeps = @{$self->{_types}};
if ( !@keeps || $error->is_type(@keeps) ) {
Expand All @@ -260,7 +276,6 @@ sub newfile {
my $file = shift;

delete $self->{_parser};
$self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } );
$self->{_file} = $file;
$self->{_line} = 0;
$self->{_column} = 0;
Expand All @@ -269,12 +284,6 @@ sub newfile {
return $self->{_file};
} # newfile

sub _parser {
my $self = shift;

return $self->{_parser};
}

=pod
HTML::Lint::Parser is a class only for this module.
Expand Down Expand Up @@ -386,6 +395,10 @@ sub _start {
sub _text {
my ($self,$text) = @_;

while ( $text =~ /&(?![#0-9a-z])/ig ) {
$self->gripe( 'text-use-entity', char => '&', entity => '&amp;' );
}

while ( $text =~ /([^\x09\x0A\x0D -~])/g ) {
my $bad = $1;
$self->gripe(
Expand All @@ -395,6 +408,50 @@ sub _text {
);
}

if ( not $self->{_unclosed_entities_regex} ) {
# Get Gisle's list
my @entities = sort keys %HTML::Entities::entity2char;

# Strip his semicolons
s/;$// for @entities;

# Build a regex
my $entities = join( '|', @entities );
$self->{_unclosed_entities_regex} = qr/&($entities)(?!;)/;

$self->{_entity_lookup} = { map { ($_,1) } @entities };
}

while ( $text =~ m/$self->{_unclosed_entities_regex}/g ) {
my $ent = $1;
$self->gripe( 'text-unclosed-entity', entity => "&$ent;" );
}

while ( $text =~ m/&([^;]+);/g ) {
my $ent = $1;

# Numeric entities are fine, if they're not too large.
if ( $ent =~ /^#(\d+)$/ ) {
if ( $1 > 65536 ) {
$self->gripe( 'text-invalid-entity', entity => "&$ent;" );
}
next;
}

# Hex entities are fine, if they're not too large.
if ( $ent =~ /^#x([\dA-F]+)$/i ) {
if ( length($1) > 4 ) {
$self->gripe( 'text-invalid-entity', entity => "&$ent;" );
}
next;
}

# If it's not a numeric entity, then check the lookup table.
if ( !exists $self->{_entity_lookup}{$ent} ) {
$self->gripe( 'text-unknown-entity', entity => "&$ent;" );
}
}

return;
}

Expand Down
5 changes: 4 additions & 1 deletion lib/HTML/Lint/Error.pm
Expand Up @@ -261,7 +261,10 @@ Andy Lester, C<andy at petdance.com>
'attr-repeated' => ['${attr} attribute in <${tag}> is repeated', STRUCTURE],
'attr-unknown' => ['Unknown attribute "${attr}" for tag <${tag}>', FLUFF],

'text-use-entity' => ['Invalid character ${char} should be written as ${entity}', STRUCTURE],
'text-invalid-entity' => ['Entity ${entity} is invalid', STRUCTURE],
'text-unclosed-entity' => ['Entity ${entity} is missing its closing semicolon', STRUCTURE],
'text-unknown-entity' => ['Entity ${entity} is unknown', STRUCTURE],
'text-use-entity' => ['Character "${char}" should be written as ${entity}', STRUCTURE],
);

1; # happy
Expand Down
6 changes: 3 additions & 3 deletions lib/Test/HTML/Lint.pm
Expand Up @@ -6,7 +6,7 @@ use strict;
use Test::Builder;
use Exporter;

use HTML::Lint 2.10;
use HTML::Lint;

use vars qw( @ISA $VERSION @EXPORT );

Expand All @@ -18,11 +18,11 @@ Test::HTML::Lint - Test::More-style wrapper around HTML::Lint
=head1 VERSION
Version 2.10
Version 2.11_01
=cut

$VERSION = '2.10';
$VERSION = '2.11_01';

my $Tester = Test::Builder->new;

Expand Down
2 changes: 1 addition & 1 deletion t/00-load.t
Expand Up @@ -10,4 +10,4 @@ BEGIN {
use_ok( 'Test::HTML::Lint' );
}

diag( "Testing HTML::Lint $HTML::Lint::VERSION" );
diag( "Testing HTML::Lint $HTML::Lint::VERSION, Perl $], $^X" );
45 changes: 45 additions & 0 deletions t/text-invalid-entity.t
@@ -0,0 +1,45 @@
#!perl

use warnings;
use strict;

require 't/LintTest.pl';

checkit( [
[ 'text-unknown-entity' => qr/Entity &metalhorns; is unknown/ ],
[ 'text-invalid-entity' => qr/Entity &#8675309; is invalid/ ],
[ 'text-invalid-entity' => qr/Entity &#xdeadbeef; is invalid/ ],
[ 'text-unknown-entity' => qr/Entity &xdeadbeef; is unknown/ ],
], [<DATA>] );

__DATA__
<html>
<head>
<title>Ace of &spades;: A tribute to Mot&ouml;rhead. &#174; &metalhorns;</title>
<script>
function foo() {
if ( 6 == 9 && 25 == 6 ) {
x = 14;
}
}
</script>
</head>
<body bgcolor="white">
<p>
Thanks for visiting Ace of &#9824; <!-- Numeric version of &spades; -->
<p>
Ace of &#x2660; is your single source for everything related to Mot&ouml;rhead.
<p>
Here's an icon of my girlfriend Jenny: &#8675309; <!-- invalid because we cap at 65536 -->
<p>
And here's an icon of a deceased cow: &#xdeadbeef; <!-- Invalid because we cap at xFFFF -->
<p>
Another deceased cow: &xdeadbeef; <!-- Not a valid hex entity, but unknown to our lookup tables -->
<p>
Here's an awesome link to <a href="http://www.youtube.com/watch?v=8yLhA0ROGi4&feature=related">"You Better Swim"</a> from the SpongeBob movie.
<!--
Here in the safety of comments, we can put whatever &invalid; and &malformed entities we want, &
nobody can stop us. Except maybe Cheech & Chong.
-->
</body>
</html>
33 changes: 33 additions & 0 deletions t/text-unclosed-entity.t
@@ -0,0 +1,33 @@
#!perl

use warnings;
use strict;

require 't/LintTest.pl';

checkit( [
[ 'text-unclosed-entity' => qr/Entity &ouml; is missing its closing semicolon/ ],
], [<DATA>] );

__DATA__
<html>
<head>
<title>Ace of &spades;: A tribute to Mot&ouml;rhead.</title>
<script>
function foo() {
if ( 6 == 9 && 25 == 6 ) {
x = 14;
}
}
</script>
</head>
<body bgcolor="white">
Mot&oumlrhead rulez!
<p>
Here's an awesome link to <a href="http://www.youtube.com/watch?v=8yLhA0ROGi4&feature=related">"You Better Swim"</a> from the SpongeBob movie.
<!--
Here in the safety of comments, we can put whatever &invalid; and &malformed entities we want, &
nobody can stop us. Except maybe Cheech & Chong.
-->
</body>
</html>

0 comments on commit 14a094d

Please sign in to comment.