From 84735693cd3ef8fd2de062f68e9dcc9d8743b70b Mon Sep 17 00:00:00 2001 From: Sebastian Riedel Date: Wed, 14 Jul 2010 12:38:44 +0200 Subject: [PATCH] fixed Mojo::DOM to support escaped selectors --- Changes | 1 + lib/Mojo/DOM.pm | 40 +++++++++++++++++++++++++++++----------- t/mojo/dom.t | 34 ++++++++++++++++++++++++++++++---- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/Changes b/Changes index 57e1c37c70..8d461f5f14 100644 --- a/Changes +++ b/Changes @@ -18,6 +18,7 @@ This file documents the revision history for Perl extension Mojolicious. - Improved Mojo::Template error messages. - Improved generated multipart messages to be 2 bytes shorter. (John Kingsley) + - Fixed Mojo::DOM to support escaped selectors. - Fixed json/data rendering with layouts in MojoX::Renderer. - Fixed Mojo::IOLoop to not stop unexpectedly. - Fixed graceful shutdown in Mojo::Server::Daemon::Prefork. diff --git a/lib/Mojo/DOM.pm b/lib/Mojo/DOM.pm index a1aa2fb953..d7a14f24db 100644 --- a/lib/Mojo/DOM.pm +++ b/lib/Mojo/DOM.pm @@ -19,11 +19,11 @@ __PACKAGE__->attr(tree => sub { ['root'] }); # Regex my $CSS_ATTR_RE = qr/ \[ - (\w+) # Key + (\w+) # Key (?: - (\W)? # Operator + (\W)? # Operator = - "([^"]+)" # Value + "((?:[^"]|\\")+)" # Value )? \] /x; @@ -32,13 +32,13 @@ my $CSS_ELEMENT_RE = qr/^([^\.\#]+)/; my $CSS_ID_RE = qr/\#([^\#]+)/; my $CSS_PSEUDO_CLASS_RE = qr/(?:\:(\w+)(?:\(([^\)]+)\))?)/; my $CSS_TOKEN_RE = qr/ - (\s*,\s*)? # Separator - ([\w\.\*\#]+)? # Element - ((?:\:\w+(?:\([^\)]+\))?)*)? # Pseudo Class - ((?:\[\w+(?:\W?="[^"]+")?\])*)? # Attributes + (\s*,\s*)? # Separator + ((?:[\w\.\*\#]|\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}\s?)+)? # Element + ((?:\:\w+(?:\([^\)]+\))?)*)? # Pseudo Class + ((?:\[\w+(?:\W?="(?:[^"]|\\")+")?\])*)? # Attributes (?: \s* - ([\>\+\~]) # Combinator + ([\>\+\~]) # Combinator )? /x; my $XML_ATTR_RE = qr/ @@ -278,6 +278,21 @@ sub _compare { return 1; } +sub _css_unescape { + my ($self, $value) = @_; + + # Remove escaped newlines + $value =~ s/\\\n//g; + + # Unescape unicode characters + $value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack('U', hex $1)/gex; + + # Remove backslash + $value =~ s/\\//g; + + return $value; +} + sub _doctype { my ($self, $doctype, $current) = @_; @@ -415,12 +430,15 @@ sub _parse_css { # Classes while ($element =~ /$CSS_CLASS_RE/g) { - push @$selector, ['attribute', 'class', qr/(?:^|\W+)$1(?:\W+|$)/]; + my $class = $self->_css_unescape($1); + push @$selector, + ['attribute', 'class', qr/(?:^|\W+)$class(?:\W+|$)/]; } # ID if ($element =~ /$CSS_ID_RE/) { - push @$selector, ['attribute', 'id', qr/^$1$/]; + my $id = $self->_css_unescape($1); + push @$selector, ['attribute', 'id', qr/^$id$/]; } # Pseudo classes @@ -441,7 +459,7 @@ sub _parse_css { if ($value) { # Quote - $value = quotemeta $value; + $value = quotemeta $self->_css_unescape($value); # "^=" (begins with) if ($op eq '^') { $regex = qr/^$value/ } diff --git a/t/mojo/dom.t b/t/mojo/dom.t index e8730d6411..a72d240516 100644 --- a/t/mojo/dom.t +++ b/t/mojo/dom.t @@ -7,7 +7,7 @@ use warnings; use utf8; -use Test::More tests => 59; +use Test::More tests => 81; # Homer gave me a kidney: it wasn't his, I didn't need it, # and it came postage due- but I appreciated the gesture! @@ -163,6 +163,32 @@ is($dom->at('[foo="bar"]')->text, 'works', 'right text'); is($dom->at('[foo="ba"]'), undef, 'no result'); is($dom->at('.tset')->text, 'works', 'right text'); -# Already decoded unicode snowman -$dom->charset(undef)->parse('
'); -is($dom->at('#snowman')->text, '☃', 'right text'); +# Already decoded unicode snowman and quotes in selector +$dom->charset(undef)->parse('
'); +is($dom->at('[id="sno\"wman"]')->text, '☃', 'right text'); + +# Unicode and escaped id selectors +$dom->parse( + qq/

Snowman<\/div>
Heart<\/div><\/p>/); +is($dom->at("#\\\n\\002603x")->text, 'Snowman', 'right text'); +is($dom->at('#\\2603 x')->text, 'Snowman', 'right text'); +is($dom->at("#\\\n\\2603 x")->text, 'Snowman', 'right text'); +is($dom->at(qq/[id="\\\n\\2603 x"]/)->text, 'Snowman', 'right text'); +is($dom->at(qq/[id="\\\n\\002603x"]/)->text, 'Snowman', 'right text'); +is($dom->at(qq/[id="\\\\2603 x"]/)->text, 'Snowman', 'right text'); +is($dom->at("p #\\\n\\002603x")->text, 'Snowman', 'right text'); +is($dom->at('p #\\2603 x')->text, 'Snowman', 'right text'); +is($dom->at("p #\\\n\\2603 x")->text, 'Snowman', 'right text'); +is($dom->at(qq/p [id="\\\n\\2603 x"]/)->text, 'Snowman', 'right text'); +is($dom->at(qq/p [id="\\\n\\002603x"]/)->text, 'Snowman', 'right text'); +is($dom->at(qq/p [id="\\\\2603 x"]/)->text, 'Snowman', 'right text'); +is($dom->at(".\\\n\\002665")->text, 'Heart', 'right text'); +is($dom->at('.\\2665')->text, 'Heart', 'right text'); +is($dom->at("p .\\\n\\002665")->text, 'Heart', 'right text'); +is($dom->at('p .\\2665')->text, 'Heart', 'right text'); +is($dom->at(qq/p [class\$="\\\n\\002665"]/)->text, 'Heart', 'right text'); +is($dom->at(qq/p [class\$="\\2665"]/)->text, 'Heart', 'right text'); +is($dom->at(qq/[class\$="\\\n\\002665"]/)->text, 'Heart', 'right text'); +is($dom->at(qq/[class\$="\\2665"]/)->text, 'Heart', 'right text'); +is($dom->at('.x')->text, 'Heart', 'right text'); +is($dom->at('p .x')->text, 'Heart', 'right text');