Skip to content

Commit

Permalink
fixed Mojo::DOM to support escaped selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Jul 14, 2010
1 parent 44a9909 commit 8473569
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 15 deletions.
1 change: 1 addition & 0 deletions Changes
Expand Up @@ -18,6 +18,7 @@ This file documents the revision history for Perl extension Mojolicious.
- Improved Mojo::Template error messages.
- Improved generated multipart messages to be 2 bytes shorter.
(John Kingsley)
- Fixed Mojo::DOM to support escaped selectors.
- Fixed json/data rendering with layouts in MojoX::Renderer.
- Fixed Mojo::IOLoop to not stop unexpectedly.
- Fixed graceful shutdown in Mojo::Server::Daemon::Prefork.
Expand Down
40 changes: 29 additions & 11 deletions lib/Mojo/DOM.pm
Expand Up @@ -19,11 +19,11 @@ __PACKAGE__->attr(tree => sub { ['root'] });
# Regex
my $CSS_ATTR_RE = qr/
\[
(\w+) # Key
(\w+) # Key
(?:
(\W)? # Operator
(\W)? # Operator
=
"([^"]+)" # Value
"((?:[^"]|\\")+)" # Value
)?
\]
/x;
Expand All @@ -32,13 +32,13 @@ my $CSS_ELEMENT_RE = qr/^([^\.\#]+)/;
my $CSS_ID_RE = qr/\#([^\#]+)/;
my $CSS_PSEUDO_CLASS_RE = qr/(?:\:(\w+)(?:\(([^\)]+)\))?)/;
my $CSS_TOKEN_RE = qr/
(\s*,\s*)? # Separator
([\w\.\*\#]+)? # Element
((?:\:\w+(?:\([^\)]+\))?)*)? # Pseudo Class
((?:\[\w+(?:\W?="[^"]+")?\])*)? # Attributes
(\s*,\s*)? # Separator
((?:[\w\.\*\#]|\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}\s?)+)? # Element
((?:\:\w+(?:\([^\)]+\))?)*)? # Pseudo Class
((?:\[\w+(?:\W?="(?:[^"]|\\")+")?\])*)? # Attributes
(?:
\s*
([\>\+\~]) # Combinator
([\>\+\~]) # Combinator
)?
/x;
my $XML_ATTR_RE = qr/
Expand Down Expand Up @@ -278,6 +278,21 @@ sub _compare {
return 1;
}

sub _css_unescape {
my ($self, $value) = @_;

# Remove escaped newlines
$value =~ s/\\\n//g;

# Unescape unicode characters
$value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack('U', hex $1)/gex;

# Remove backslash
$value =~ s/\\//g;

return $value;
}

sub _doctype {
my ($self, $doctype, $current) = @_;

Expand Down Expand Up @@ -415,12 +430,15 @@ sub _parse_css {

# Classes
while ($element =~ /$CSS_CLASS_RE/g) {
push @$selector, ['attribute', 'class', qr/(?:^|\W+)$1(?:\W+|$)/];
my $class = $self->_css_unescape($1);
push @$selector,
['attribute', 'class', qr/(?:^|\W+)$class(?:\W+|$)/];
}

# ID
if ($element =~ /$CSS_ID_RE/) {
push @$selector, ['attribute', 'id', qr/^$1$/];
my $id = $self->_css_unescape($1);
push @$selector, ['attribute', 'id', qr/^$id$/];
}

# Pseudo classes
Expand All @@ -441,7 +459,7 @@ sub _parse_css {
if ($value) {

# Quote
$value = quotemeta $value;
$value = quotemeta $self->_css_unescape($value);

# "^=" (begins with)
if ($op eq '^') { $regex = qr/^$value/ }
Expand Down
34 changes: 30 additions & 4 deletions t/mojo/dom.t
Expand Up @@ -7,7 +7,7 @@ use warnings;

use utf8;

use Test::More tests => 59;
use Test::More tests => 81;

# Homer gave me a kidney: it wasn't his, I didn't need it,
# and it came postage due- but I appreciated the gesture!
Expand Down Expand Up @@ -163,6 +163,32 @@ is($dom->at('[foo="bar"]')->text, 'works', 'right text');
is($dom->at('[foo="ba"]'), undef, 'no result');
is($dom->at('.tset')->text, 'works', 'right text');

# Already decoded unicode snowman
$dom->charset(undef)->parse('<div id="snowman">☃</div>');
is($dom->at('#snowman')->text, '', 'right text');
# Already decoded unicode snowman and quotes in selector
$dom->charset(undef)->parse('<div id="sno&quot;wman">☃</div>');
is($dom->at('[id="sno\"wman"]')->text, '', 'right text');

# Unicode and escaped id selectors
$dom->parse(
qq/<p><div id="☃x">Snowman<\/div><div class="x ♥">Heart<\/div><\/p>/);
is($dom->at("#\\\n\\002603x")->text, 'Snowman', 'right text');
is($dom->at('#\\2603 x')->text, 'Snowman', 'right text');
is($dom->at("#\\\n\\2603 x")->text, 'Snowman', 'right text');
is($dom->at(qq/[id="\\\n\\2603 x"]/)->text, 'Snowman', 'right text');
is($dom->at(qq/[id="\\\n\\002603x"]/)->text, 'Snowman', 'right text');
is($dom->at(qq/[id="\\\\2603 x"]/)->text, 'Snowman', 'right text');
is($dom->at("p #\\\n\\002603x")->text, 'Snowman', 'right text');
is($dom->at('p #\\2603 x')->text, 'Snowman', 'right text');
is($dom->at("p #\\\n\\2603 x")->text, 'Snowman', 'right text');
is($dom->at(qq/p [id="\\\n\\2603 x"]/)->text, 'Snowman', 'right text');
is($dom->at(qq/p [id="\\\n\\002603x"]/)->text, 'Snowman', 'right text');
is($dom->at(qq/p [id="\\\\2603 x"]/)->text, 'Snowman', 'right text');
is($dom->at(".\\\n\\002665")->text, 'Heart', 'right text');
is($dom->at('.\\2665')->text, 'Heart', 'right text');
is($dom->at("p .\\\n\\002665")->text, 'Heart', 'right text');
is($dom->at('p .\\2665')->text, 'Heart', 'right text');
is($dom->at(qq/p [class\$="\\\n\\002665"]/)->text, 'Heart', 'right text');
is($dom->at(qq/p [class\$="\\2665"]/)->text, 'Heart', 'right text');
is($dom->at(qq/[class\$="\\\n\\002665"]/)->text, 'Heart', 'right text');
is($dom->at(qq/[class\$="\\2665"]/)->text, 'Heart', 'right text');
is($dom->at('.x')->text, 'Heart', 'right text');
is($dom->at('p .x')->text, 'Heart', 'right text');

0 comments on commit 8473569

Please sign in to comment.