Skip to content

Commit

Permalink
experimenting with HTML::Parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Bamber committed Oct 4, 2010
1 parent aef9780 commit 309c024
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions lib/HTML/Acid.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Readonly my $URL_REGEX => qr{
/ # internal URLs only by default
\w # at least one normal character
[\w\-/]* #
(?:\.\w{1,5})? # optional file extension
(?:\#[\w\-]+)? # optional anchor
\z # end of string
}xms;
Expand Down Expand Up @@ -66,16 +67,15 @@ sub new {
api_version => 3,
empty_element_tags=>1,
strict_comment=>1,
attr_encoded=>1,
handlers => {
text=>['_text_process', 'self,dtext'],
start=>['_start_process', 'self,tagname,attr'],
end=>['_end_process', 'self,tagname'],
end_document=>['_end_document', 'self'],
start_document=>['_reset', 'self'],
},
);

# Set up HTML::Parser handlers
$self->handler(text=>'_text_process', 'self,dtext');
$self->handler(start=>'_start_process', 'self,tagname,attr');
$self->handler(end=>'_end_process', 'self,tagname');
$self->handler(end_document=>'_end_document', 'self');
$self->handler(start_document=>'_reset', 'self');

# Bypass as much as possible
$self->ignore_elements('script','style');
my @tags = sort {
Expand Down

0 comments on commit 309c024

Please sign in to comment.