Permalink
Browse files

Adding ability to turn off individual errors. Releasing 2.12

  • Loading branch information...
1 parent 1726551 commit 6aa246fdd32af27e0155e5193a5c6bd419517278 @petdance committed Apr 5, 2012
Showing with 646 additions and 311 deletions.
  1. +11 −2 Changes
  2. +3 −0 MANIFEST
  3. +32 −259 lib/HTML/Lint.pm
  4. +120 −45 lib/HTML/Lint/Error.pm
  5. +389 −0 lib/HTML/Lint/Parser.pm
  6. +3 −3 lib/Test/HTML/Lint.pm
  7. +2 −0 t/01-coverage.t
  8. +20 −0 t/config-unknown-directive.t
  9. +23 −0 t/config-unknown-value.t
  10. +2 −2 t/elem-img-sizes-missing.t
  11. +41 −0 t/nolint.t
View
@@ -6,10 +6,19 @@ NOTE: All bugs and requests are now being handled through GitHub.
DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/
-
-2.11_01 Thu Mar 29 15:46:59 CDT 2012
+2.12 Thu Apr 5 18:12:10 CDT 2012
[ENHANCEMENTS]
+ Sometimes creating HTML::Lint-compliant HTML just isn't possible.
+ Now, you can now turn individual errors on and off in your HTML
+ via comment directives, like so:
+
+ <!-- html-lint elem-img-sizes-missing: off, attr-unknown: off -->
+
+ And if you have a batch of code that's hopeless:
+
+ <!-- html-lint all: off -->
+
Added check for unknown entities (&foo;) and unclosed entitities
(&amp with a semicolon).
View
@@ -6,6 +6,7 @@ bin/weblint
lib/HTML/Lint.pm
lib/HTML/Lint/Error.pm
lib/HTML/Lint/HTML4.pm
+lib/HTML/Lint/Parser.pm
lib/Test/HTML/Lint.pm
t/LintTest.pl
t/00-load.t
@@ -21,6 +22,8 @@ t/40-where.t
t/50-multiple-files.t
t/attr-repeated.t
t/attr-unknown.t
+t/config-unknown-directive.t
+t/config-unknown-value.t
t/doc-tag-required.t
t/elem-empty-but-closed.t
t/elem-img-alt-missing.t
View
@@ -4,6 +4,8 @@ use warnings;
use strict;
use HTML::Lint::Error;
+use HTML::Lint::Parser ();
+
use HTML::Entities ();
=head1 NAME
@@ -12,11 +14,11 @@ HTML::Lint - check for HTML errors in a string or file
=head1 VERSION
-Version 2.11_01
+Version 2.12
=cut
-our $VERSION = '2.11_01';
+our $VERSION = '2.12';
=head1 SYNOPSIS
@@ -247,6 +249,7 @@ sub gripe {
return;
}
+
=head2 $lint->newfile( $filename )
Call C<newfile()> whenever you switch to another file in a batch
@@ -271,275 +274,45 @@ sub newfile {
return $self->{_file};
} # newfile
-=pod
-
-HTML::Lint::Parser is a class only for this module.
-
-=cut
-
-package HTML::Lint::Parser;
-
-use HTML::Parser 3.20;
-use HTML::Tagset 3.03;
-
-use HTML::Lint::HTML4 qw( %isKnownAttribute %isRequired %isNonrepeatable %isObsolete );
-use HTML::Entities qw( %char2entity );
-
-use base 'HTML::Parser';
-
-sub new {
- my $class = shift;
- my $gripe = shift;
-
- my $self =
- HTML::Parser->new(
- api_version => 3,
- start_document_h => [ \&_start_document, 'self' ],
- end_document_h => [ \&_end_document, 'self,line,column' ],
- start_h => [ \&_start, 'self,tagname,line,column,@attr' ],
- end_h => [ \&_end, 'self,tagname,line,column,tokenpos,@attr' ],
- text_h => [ \&_text, 'self,text' ],
- strict_names => 0,
- empty_element_tags => 1,
- );
- bless $self, $class;
-
- $self->{_gripe} = $gripe;
- $self->{_stack} = [];
-
- return $self;
-}
-
-sub gripe {
- my $self = shift;
-
- return $self->{_gripe}->( @_ );
-}
-
-sub _start_document {
-}
-
-sub _end_document {
- my ($self,$line,$column) = @_;
-
- for my $tag ( keys %isRequired ) {
- if ( !$self->{_first_seen}->{$tag} ) {
- $self->gripe( 'doc-tag-required', tag => $tag );
- }
- }
-
- return;
-}
-
-sub _start {
- my ($self,$tag,$line,$column,@attr) = @_;
-
- $self->{_line} = $line;
- $self->{_column} = $column;
-
- my $validattr = $isKnownAttribute{ $tag };
- if ( $validattr ) {
- my %seen;
- my $i = 0;
- while ( $i < @attr ) {
- my ($attr,$val) = @attr[$i++,$i++];
- if ( $seen{$attr}++ ) {
- $self->gripe( 'attr-repeated', tag => $tag, attr => $attr );
- }
-
- if ( $validattr && ( !$validattr->{$attr} ) ) {
- $self->gripe( 'attr-unknown', tag => $tag, attr => $attr );
- }
- } # while attribs
- }
- else {
- $self->gripe( 'elem-unknown', tag => $tag );
- }
- $self->_element_push( $tag ) unless $HTML::Tagset::emptyElement{ $tag };
-
- if ( my $where = $self->{_first_seen}{$tag} ) {
- if ( $isNonrepeatable{$tag} ) {
- $self->gripe( 'elem-nonrepeatable',
- tag => $tag,
- where => HTML::Lint::Error::where( @{$where} )
- );
- }
- }
- else {
- $self->{_first_seen}{$tag} = [$line,$column];
- }
-
- # Call any other overloaded func
- my $tagfunc = "_start_$tag";
- if ( $self->can($tagfunc) ) {
- $self->$tagfunc( $tag, @attr );
- }
-
- return;
-}
-
-sub _text {
- my ($self,$text) = @_;
-
- while ( $text =~ /&(?![#0-9a-z])/ig ) {
- $self->gripe( 'text-use-entity', char => '&', entity => '&amp;' );
- }
-
- while ( $text =~ /([^\x09\x0A\x0D -~])/g ) {
- my $bad = $1;
- $self->gripe(
- 'text-use-entity',
- char => sprintf( '\x%02lX', ord($bad) ),
- entity => $char2entity{ $bad },
- );
- }
-
- if ( not $self->{_unclosed_entities_regex} ) {
- # Get Gisle's list
- my @entities = sort keys %HTML::Entities::entity2char;
-
- # Strip his semicolons
- s/;$// for @entities;
-
- # Build a regex
- my $entities = join( '|', @entities );
- $self->{_unclosed_entities_regex} = qr/&($entities)(?!;)/;
-
- $self->{_entity_lookup} = { map { ($_,1) } @entities };
- }
-
- while ( $text =~ m/$self->{_unclosed_entities_regex}/g ) {
- my $ent = $1;
- $self->gripe( 'text-unclosed-entity', entity => "&$ent;" );
- }
-
- while ( $text =~ m/&([^;]+);/g ) {
- my $ent = $1;
-
- # Numeric entities are fine, if they're not too large.
- if ( $ent =~ /^#(\d+)$/ ) {
- if ( $1 > 65536 ) {
- $self->gripe( 'text-invalid-entity', entity => "&$ent;" );
- }
- next;
- }
-
- # Hex entities are fine, if they're not too large.
- if ( $ent =~ /^#x([\dA-F]+)$/i ) {
- if ( length($1) > 4 ) {
- $self->gripe( 'text-invalid-entity', entity => "&$ent;" );
- }
- next;
- }
-
- # If it's not a numeric entity, then check the lookup table.
- if ( !exists $self->{_entity_lookup}{$ent} ) {
- $self->gripe( 'text-unknown-entity', entity => "&$ent;" );
- }
- }
-
- return;
-}
-
-sub _end {
- my ($self,$tag,$line,$column,$tokenpos,@attr) = @_;
-
- $self->{_line} = $line;
- $self->{_column} = $column;
-
- if ( !$tokenpos ) {
- # This is a dummy end event for something like <img />.
- # Do nothing.
- }
- elsif ( $HTML::Tagset::emptyElement{ $tag } ) {
- $self->gripe( 'elem-empty-but-closed', tag => $tag );
- }
- else {
- if ( $self->_in_context($tag) ) {
- my @leftovers = $self->_element_pop_back_to($tag);
- for ( @leftovers ) {
- my ($tag,$line,$col) = @{$_};
- $self->gripe( 'elem-unclosed', tag => $tag,
- where => HTML::Lint::Error::where($line,$col) )
- unless $HTML::Tagset::optionalEndTag{$tag};
- } # for
- }
- else {
- $self->gripe( 'elem-unopened', tag => $tag );
- }
- } # is empty element
-
- # Call any other overloaded func
- my $tagfunc = "_end_$tag";
- if ( $self->can($tagfunc) ) {
- $self->$tagfunc( $tag, $line );
- }
+1;
- return;
-}
+=head1 MODIFYING HTML::LINT'S BEHAVIOR
-sub _element_push {
- my $self = shift;
- for ( @_ ) {
- push( @{$self->{_stack}}, [$_,$self->{_line},$self->{_column}] );
- } # while
+Sometimes you'll have HTML that for some reason cannot conform to
+HTML::Lint's expectations. For those instances, you can use HTML
+comments to modify HTML::Lint's behavior.
- return;
-}
+Say you have an image where for whatever reason you can't get
+dimensions for the image. This HTML snippet:
-sub _find_tag_in_stack {
- my $self = shift;
- my $tag = shift;
- my $stack = $self->{_stack};
+ <img src="logo.png" height="120" width="50" alt="Company logo">
+ <img src="that.png">
- my $offset = @{$stack} - 1;
- while ( $offset >= 0 ) {
- if ( $stack->[$offset][0] eq $tag ) {
- return $offset;
- }
- --$offset;
- } # while
+causes this error:
- return;
-}
+ foo.html (14:20) <img src="that.png"> tag has no HEIGHT and WIDTH attributes
-sub _element_pop_back_to {
- my $self = shift;
- my $tag = shift;
+But if for some reason you can't get those dimensions when you build
+the page, you can at least stop HTML::Lint complaining about it.
- my $offset = $self->_find_tag_in_stack($tag) or return;
+ <img src="this.png" height="120" width="50" alt="Company logo">
+ <!-- html-lint elem-img-sizes-missing: off, elem-img-alt-missing: off -->
+ <img src="that.png">
+ <!-- html-lint elem-img-sizes-missing: on, elem-img-alt-missing: off -->
- my @leftovers = splice( @{$self->{_stack}}, $offset + 1 );
- pop @{$self->{_stack}};
+If you want to turn off all HTML::Lint warnings for a block of code, use
- return @leftovers;
-}
+ <!-- html-lint all: off -->
-sub _in_context {
- my $self = shift;
- my $tag = shift;
+And turn them back on with
- my $offset = $self->_find_tag_in_stack($tag);
- return defined $offset;
-}
+ <!-- html-lint all: off -->
-# Overridden tag-specific stuff
-sub _start_img {
- my ($self,$tag,%attr) = @_;
-
- my ($h,$w,$src) = @attr{qw( height width src )};
- if ( defined $h && defined $w ) {
- # Check sizes
- }
- else {
- $self->gripe( 'elem-img-sizes-missing', src=>$src );
- }
- if ( not defined $attr{alt} ) {
- $self->gripe( 'elem-img-alt-missing', src=>$src );
- }
+You don't have to use "on" and "off". For "on", you can use "true"
+or "1". For "off", you can use "0" or "false".
- return;
-}
+For a list of possible errors and their codes, see L<HTML::Lint::Error>,
+or run F<perldoc HTML::Lint::Error>.
=head1 BUGS, WISHES AND CORRESPONDENCE
@@ -575,8 +348,8 @@ DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/
Copyright 2005-2012 Andy Lester.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the Artistic License v2.0.
+This program is free software; you can redistribute it and/or modify it
+under the terms of the Artistic License v2.0.
http://www.opensource.org/licenses/Artistic-2.0
Oops, something went wrong.

0 comments on commit 6aa246f

Please sign in to comment.